[flang-commits] [flang] [mlir] [flang] Introduce omp.target_allocmem and omp.target_freemem omp dialect ops. (PR #145464)

Wed Jul 16 09:21:47 PDT 2025

https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/145464

>From 91d2c54266ba33fb01b30d81b6694914180ad982 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 23 Jun 2025 16:39:55 +0530
Subject: [PATCH 1/8] [flang] Introduce omp_target_allocmem and
 omp_target_freemem fir ops.

---
 .../include/flang/Optimizer/Dialect/FIROps.td |  58 ++++++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 130 +++++++++++++++---
 2 files changed, 172 insertions(+), 16 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 99b5105ab365e..de10334ed7b18 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -517,6 +517,64 @@ def fir_ZeroOp : fir_OneResultOp<"zero_bits", [NoMemoryEffect]> {
   let assemblyFormat = "type($intype) attr-dict";
 }
 
+def fir_OmpTargetAllocMemOp : fir_Op<"omp_target_allocmem",
+    [MemoryEffects<[MemAlloc<DefaultResource>]>, AttrSizedOperandSegments]> {
+  let summary = "allocate storage on an openmp device for an object of a given type";
+
+  let description = [{
+    Creates a heap memory reference suitable for storing a value of the
+    given type, T.  The heap refernce returned has type `!fir.heap<T>`.
+    The memory object is in an undefined state.  `omp_target_allocmem` operations must
+    be paired with `omp_target_freemem` operations to avoid memory leaks.
+
+    ```
+      %0 = "fir.omp_target_allocmem"(%device, %type) : (i32, index) -> !fir.heap<!fir.array<?xf32>>
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<AnyIntegerType>:$device,
+    TypeAttr:$in_type,
+    OptionalAttr<StrAttr>:$uniq_name,
+    OptionalAttr<StrAttr>:$bindc_name,
+    Variadic<AnyIntegerType>:$typeparams,
+    Variadic<AnyIntegerType>:$shape
+  );
+  let results = (outs fir_HeapType);
+
+  let extraClassDeclaration = [{
+    mlir::Type getAllocatedType();
+    bool hasLenParams() { return !getTypeparams().empty(); }
+    bool hasShapeOperands() { return !getShape().empty(); }
+    unsigned numLenParams() { return getTypeparams().size(); }
+    operand_range getLenParams() { return getTypeparams(); }
+    unsigned numShapeOperands() { return getShape().size(); }
+    operand_range getShapeOperands() { return getShape(); }
+    static mlir::Type getRefTy(mlir::Type ty);
+  }];
+}
+
+def fir_OmpTargetFreeMemOp : fir_Op<"omp_target_freemem",
+  [MemoryEffects<[MemFree]>]> {
+  let summary = "free a heap object";
+
+  let description = [{
+    Deallocates a heap memory reference that was allocated by an `omp_target_allocmem`.
+    The memory object that is deallocated is placed in an undefined state
+    after `fir.omp_target_freemem`.
+    ```
+      %0 = "fir.omp_target_allocmem"(%device, %type) : (i32, index) -> !fir.heap<!fir.array<?xf32>>
+      ...
+      "fir.omp_target_freemem"(%device, %0) : (i32, !fir.heap<!fir.array<?xf32>>) -> ()
+    ```
+  }];
+
+  let arguments = (ins
+  Arg<AnyIntegerType, "", [MemFree]>:$device,
+  Arg<fir_HeapType, "", [MemFree]>:$heapref
+  );
+}
+
 //===----------------------------------------------------------------------===//
 // Terminator operations
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index ecc04a6c9a2be..fc7a21bcbe69f 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1206,6 +1206,105 @@ struct FreeMemOpConversion : public fir::FIROpConversion<fir::FreeMemOp> {
 };
 } // namespace
 
+static mlir::LLVM::LLVMFuncOp getOmpTargetAlloc(mlir::Operation *op) {
+  auto module = op->getParentOfType<mlir::ModuleOp>();
+  if (mlir::LLVM::LLVMFuncOp mallocFunc =
+          module.lookupSymbol<mlir::LLVM::LLVMFuncOp>("omp_target_alloc"))
+    return mallocFunc;
+  mlir::OpBuilder moduleBuilder(module.getBodyRegion());
+  auto i64Ty = mlir::IntegerType::get(module->getContext(), 64);
+  auto i32Ty = mlir::IntegerType::get(module->getContext(), 32);
+  return moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
+      moduleBuilder.getUnknownLoc(), "omp_target_alloc",
+      mlir::LLVM::LLVMFunctionType::get(
+          mlir::LLVM::LLVMPointerType::get(module->getContext()),
+          {i64Ty, i32Ty},
+          /*isVarArg=*/false));
+}
+
+namespace {
+struct OmpTargetAllocMemOpConversion
+    : public fir::FIROpConversion<fir::OmpTargetAllocMemOp> {
+  using FIROpConversion::FIROpConversion;
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::OmpTargetAllocMemOp heap, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Type heapTy = heap.getType();
+    mlir::LLVM::LLVMFuncOp mallocFunc = getOmpTargetAlloc(heap);
+    mlir::Location loc = heap.getLoc();
+    auto ity = lowerTy().indexType();
+    mlir::Type dataTy = fir::unwrapRefType(heapTy);
+    mlir::Type llvmObjectTy = convertObjectType(dataTy);
+    if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
+      TODO(loc, "fir.omp_target_allocmem codegen of derived type with length "
+                "parameters");
+    mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy);
+    if (auto scaleSize = genAllocationScaleSize(heap, ity, rewriter))
+      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
+    for (mlir::Value opnd : adaptor.getOperands().drop_front())
+      size = rewriter.create<mlir::LLVM::MulOp>(
+          loc, ity, size, integerCast(loc, rewriter, ity, opnd));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(loc, rewriter, mallocTy, size);
+    heap->setAttr("callee", mlir::SymbolRefAttr::get(mallocFunc));
+    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+        heap, ::getLlvmPtrType(heap.getContext()),
+        mlir::SmallVector<mlir::Value, 2>({size, heap.getDevice()}),
+        addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 2));
+    return mlir::success();
+  }
+
+  /// Compute the allocation size in bytes of the element type of
+  /// \p llTy pointer type. The result is returned as a value of \p idxTy
+  /// integer type.
+  mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
+                                 mlir::ConversionPatternRewriter &rewriter,
+                                 mlir::Type llTy) const {
+    return computeElementDistance(loc, llTy, idxTy, rewriter, getDataLayout());
+  }
+};
+} // namespace
+
+static mlir::LLVM::LLVMFuncOp getOmpTargetFree(mlir::Operation *op) {
+  auto module = op->getParentOfType<mlir::ModuleOp>();
+  if (mlir::LLVM::LLVMFuncOp freeFunc =
+          module.lookupSymbol<mlir::LLVM::LLVMFuncOp>("omp_target_free"))
+    return freeFunc;
+  mlir::OpBuilder moduleBuilder(module.getBodyRegion());
+  auto i32Ty = mlir::IntegerType::get(module->getContext(), 32);
+  return moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
+      moduleBuilder.getUnknownLoc(), "omp_target_free",
+      mlir::LLVM::LLVMFunctionType::get(
+          mlir::LLVM::LLVMVoidType::get(module->getContext()),
+          {getLlvmPtrType(module->getContext()), i32Ty},
+          /*isVarArg=*/false));
+}
+
+namespace {
+struct OmpTargetFreeMemOpConversion
+    : public fir::FIROpConversion<fir::OmpTargetFreeMemOp> {
+  using FIROpConversion::FIROpConversion;
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::OmpTargetFreeMemOp freemem, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::LLVM::LLVMFuncOp freeFunc = getOmpTargetFree(freemem);
+    mlir::Location loc = freemem.getLoc();
+    freemem->setAttr("callee", mlir::SymbolRefAttr::get(freeFunc));
+    rewriter.create<mlir::LLVM::CallOp>(
+        loc, mlir::TypeRange{},
+        mlir::ValueRange{adaptor.getHeapref(), freemem.getDevice()},
+        addLLVMOpBundleAttrs(rewriter, freemem->getAttrs(), 2));
+    rewriter.eraseOp(freemem);
+    return mlir::success();
+  }
+};
+} // namespace
+
 // Convert subcomponent array indices from column-major to row-major ordering.
 static llvm::SmallVector<mlir::Value>
 convertSubcomponentIndices(mlir::Location loc, mlir::Type eleTy,
@@ -4328,22 +4427,21 @@ void fir::populateFIRToLLVMConversionPatterns(
       BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
       CmpcOpConversion, VolatileCastOpConversion, ConvertOpConversion,
       CoordinateOpConversion, CopyOpConversion, DTEntryOpConversion,
-      DeclareOpConversion,
-      DoConcurrentSpecifierOpConversion<fir::LocalitySpecifierOp>,
-      DoConcurrentSpecifierOpConversion<fir::DeclareReductionOp>,
-      DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
-      EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
-      FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
-      GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
-      LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
-      NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
-      SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
-      ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
-      SliceOpConversion, StoreOpConversion, StringLitOpConversion,
-      SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
-      UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
-      UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion,
-      XReboxOpConversion, ZeroOpConversion>(converter, options);
+      DeclareOpConversion, DivcOpConversion, EmboxOpConversion,
+      EmboxCharOpConversion, EmboxProcOpConversion, ExtractValueOpConversion,
+      FieldIndexOpConversion, FirEndOpConversion, FreeMemOpConversion,
+      GlobalLenOpConversion, GlobalOpConversion, InsertOnRangeOpConversion,
+      IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion,
+      LocalitySpecifierOpConversion, MulcOpConversion, NegcOpConversion,
+      NoReassocOpConversion, OmpTargetAllocMemOpConversion,
+      OmpTargetFreeMemOpConversion, SelectCaseOpConversion, SelectOpConversion,
+      SelectRankOpConversion, SelectTypeOpConversion, ShapeOpConversion,
+      ShapeShiftOpConversion, ShiftOpConversion, SliceOpConversion,
+      StoreOpConversion, StringLitOpConversion, SubcOpConversion,
+      TypeDescOpConversion, TypeInfoOpConversion, UnboxCharOpConversion,
+      UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion,
+      XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion,
+      ZeroOpConversion>(converter, options);
 
   // Patterns that are populated without a type converter do not trigger
   // target materializations for the operands of the root op.

>From 72947c8592bf6c5300790ff8aff349e139017407 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 26 Jun 2025 10:20:36 +0530
Subject: [PATCH 2/8] [flang] Fix parsing and printing.

---
 .../include/flang/Optimizer/Dialect/FIROps.td | 13 ++-
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 90 ++++++++++++++++---
 flang/test/Fir/omp_target_allocmem.fir        | 28 ++++++
 flang/test/Fir/omp_target_freemem.fir         | 28 ++++++
 4 files changed, 145 insertions(+), 14 deletions(-)
 create mode 100644 flang/test/Fir/omp_target_allocmem.fir
 create mode 100644 flang/test/Fir/omp_target_freemem.fir

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index de10334ed7b18..8fbce5a8a745d 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -528,7 +528,8 @@ def fir_OmpTargetAllocMemOp : fir_Op<"omp_target_allocmem",
     be paired with `omp_target_freemem` operations to avoid memory leaks.
 
     ```
-      %0 = "fir.omp_target_allocmem"(%device, %type) : (i32, index) -> !fir.heap<!fir.array<?xf32>>
+      %device = arith.constant 0 : i32
+      %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
     ```
   }];
 
@@ -542,6 +543,9 @@ def fir_OmpTargetAllocMemOp : fir_Op<"omp_target_allocmem",
   );
   let results = (outs fir_HeapType);
 
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+
   let extraClassDeclaration = [{
     mlir::Type getAllocatedType();
     bool hasLenParams() { return !getTypeparams().empty(); }
@@ -563,9 +567,9 @@ def fir_OmpTargetFreeMemOp : fir_Op<"omp_target_freemem",
     The memory object that is deallocated is placed in an undefined state
     after `fir.omp_target_freemem`.
     ```
-      %0 = "fir.omp_target_allocmem"(%device, %type) : (i32, index) -> !fir.heap<!fir.array<?xf32>>
-      ...
-      "fir.omp_target_freemem"(%device, %0) : (i32, !fir.heap<!fir.array<?xf32>>) -> ()
+      %device = arith.constant 0 : i32
+      %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
+      fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<?xf32>>
     ```
   }];
 
@@ -573,6 +577,7 @@ def fir_OmpTargetFreeMemOp : fir_Op<"omp_target_freemem",
   Arg<AnyIntegerType, "", [MemFree]>:$device,
   Arg<fir_HeapType, "", [MemFree]>:$heapref
   );
+  let assemblyFormat = "$device `,` $heapref attr-dict `:` type($device) `,` qualified(type($heapref))";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index b6bf2753b80ce..38862953b8001 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -106,24 +106,38 @@ static bool verifyTypeParamCount(mlir::Type inType, unsigned numParams) {
   return false;
 }
 
-/// Parser shared by Alloca and Allocmem
-///
+/// Parser shared by Alloca, Allocmem and OmpTargetAllocmem
+/// boolean flag isTargetOp is used to identify omp_target_allocmem
 /// operation ::= %res = (`fir.alloca` | `fir.allocmem`) $in_type
 ///                      ( `(` $typeparams `)` )? ( `,` $shape )?
 ///                      attr-dict-without-keyword
+/// operation ::= %res = (`fir.omp_target_alloca`) $device : devicetype,
+///                      $in_type ( `(` $typeparams `)` )? ( `,` $shape )?
+///                      attr-dict-without-keyword
 template <typename FN>
-static mlir::ParseResult parseAllocatableOp(FN wrapResultType,
-                                            mlir::OpAsmParser &parser,
-                                            mlir::OperationState &result) {
+static mlir::ParseResult
+parseAllocatableOp(FN wrapResultType, mlir::OpAsmParser &parser,
+                   mlir::OperationState &result, bool isTargetOp = false) {
+  auto &builder = parser.getBuilder();
+  bool hasOperands = false;
+  std::int32_t typeparamsSize = 0;
+  // Parse device number as a new operand
+  if (isTargetOp) {
+    mlir::OpAsmParser::UnresolvedOperand deviceOperand;
+    mlir::Type deviceType;
+    if (parser.parseOperand(deviceOperand) || parser.parseColonType(deviceType))
+      return mlir::failure();
+    if (parser.resolveOperand(deviceOperand, deviceType, result.operands))
+      return mlir::failure();
+    if (parser.parseComma())
+      return mlir::failure();
+  }
   mlir::Type intype;
   if (parser.parseType(intype))
     return mlir::failure();
-  auto &builder = parser.getBuilder();
   result.addAttribute("in_type", mlir::TypeAttr::get(intype));
   llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> operands;
   llvm::SmallVector<mlir::Type> typeVec;
-  bool hasOperands = false;
-  std::int32_t typeparamsSize = 0;
   if (!parser.parseOptionalLParen()) {
     // parse the LEN params of the derived type. (<params> : <types>)
     if (parser.parseOperandList(operands, mlir::OpAsmParser::Delimiter::None) ||
@@ -147,13 +161,19 @@ static mlir::ParseResult parseAllocatableOp(FN wrapResultType,
       parser.resolveOperands(operands, typeVec, parser.getNameLoc(),
                              result.operands))
     return mlir::failure();
+
   mlir::Type restype = wrapResultType(intype);
   if (!restype) {
     parser.emitError(parser.getNameLoc(), "invalid allocate type: ") << intype;
     return mlir::failure();
   }
-  result.addAttribute("operandSegmentSizes", builder.getDenseI32ArrayAttr(
-                                                 {typeparamsSize, shapeSize}));
+  llvm::SmallVector<std::int32_t> segmentSizes;
+  if (isTargetOp)
+    segmentSizes.push_back(1);
+  segmentSizes.push_back(typeparamsSize);
+  segmentSizes.push_back(shapeSize);
+  result.addAttribute("operandSegmentSizes",
+                      builder.getDenseI32ArrayAttr(segmentSizes));
   if (parser.parseOptionalAttrDict(result.attributes) ||
       parser.addTypeToList(restype, result.types))
     return mlir::failure();
@@ -385,6 +405,56 @@ llvm::LogicalResult fir::AllocMemOp::verify() {
   return mlir::success();
 }
 
+//===----------------------------------------------------------------------===//
+// OmpTargetAllocMemOp
+//===----------------------------------------------------------------------===//
+
+mlir::Type fir::OmpTargetAllocMemOp::getAllocatedType() {
+  return mlir::cast<fir::HeapType>(getType()).getEleTy();
+}
+
+mlir::Type fir::OmpTargetAllocMemOp::getRefTy(mlir::Type ty) {
+  return fir::HeapType::get(ty);
+}
+
+mlir::ParseResult
+fir::OmpTargetAllocMemOp::parse(mlir::OpAsmParser &parser,
+                                mlir::OperationState &result) {
+  return parseAllocatableOp(wrapAllocMemResultType, parser, result, true);
+}
+
+void fir::OmpTargetAllocMemOp::print(mlir::OpAsmPrinter &p) {
+  p << " ";
+  p.printOperand(getDevice());
+  p << " : ";
+  p << getDevice().getType();
+  p << ", ";
+  p << getInType();
+  if (!getTypeparams().empty()) {
+    p << '(' << getTypeparams() << " : " << getTypeparams().getTypes() << ')';
+  }
+  for (auto sh : getShape()) {
+    p << ", ";
+    p.printOperand(sh);
+  }
+  p.printOptionalAttrDict((*this)->getAttrs(),
+                          {"in_type", "operandSegmentSizes"});
+}
+
+llvm::LogicalResult fir::OmpTargetAllocMemOp::verify() {
+  llvm::SmallVector<llvm::StringRef> visited;
+  if (verifyInType(getInType(), visited, numShapeOperands()))
+    return emitOpError("invalid type for allocation");
+  if (verifyTypeParamCount(getInType(), numLenParams()))
+    return emitOpError("LEN params do not correspond to type");
+  mlir::Type outType = getType();
+  if (!mlir::dyn_cast<fir::HeapType>(outType))
+    return emitOpError("must be a !fir.heap type");
+  if (fir::isa_unknown_size_box(fir::dyn_cast_ptrEleTy(outType)))
+    return emitOpError("cannot allocate !fir.box of unknown rank or type");
+  return mlir::success();
+}
+
 //===----------------------------------------------------------------------===//
 // ArrayCoorOp
 //===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/omp_target_allocmem.fir b/flang/test/Fir/omp_target_allocmem.fir
new file mode 100644
index 0000000000000..5140c91c9510c
--- /dev/null
+++ b/flang/test/Fir/omp_target_allocmem.fir
@@ -0,0 +1,28 @@
+// RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
+
+// CHECK-LABEL: define ptr @omp_target_allocmem_array_of_nonchar(
+// CHECK: call ptr @omp_target_alloc(i64 36, i32 0)
+func.func @omp_target_allocmem_array_of_nonchar() -> !fir.heap<!fir.array<3x3xi32>> {
+  %device = arith.constant 0 : i32
+  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
+  return %1 : !fir.heap<!fir.array<3x3xi32>>
+}
+
+// CHECK-LABEL: define ptr @omp_target_allocmem_array_of_char(
+// CHECK: call ptr @omp_target_alloc(i64 90, i32 0)
+func.func @omp_target_allocmem_array_of_char() -> !fir.heap<!fir.array<3x3x!fir.char<1,10>>> {
+  %device = arith.constant 0 : i32
+  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
+  return %1 : !fir.heap<!fir.array<3x3x!fir.char<1,10>>>
+}
+
+// CHECK-LABEL: define ptr @omp_target_allocmem_array_of_dynchar(
+// CHECK-SAME: i32 %[[len:.*]])
+// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
+// CHECK: %[[mul2:.*]] = mul i64 9, %[[mul1]]
+// CHECK: call ptr @omp_target_alloc(i64 %[[mul2]], i32 0)
+func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> !fir.heap<!fir.array<3x3x!fir.char<1,?>>> {
+  %device = arith.constant 0 : i32
+  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
+  return %1 : !fir.heap<!fir.array<3x3x!fir.char<1,?>>>
+}
diff --git a/flang/test/Fir/omp_target_freemem.fir b/flang/test/Fir/omp_target_freemem.fir
new file mode 100644
index 0000000000000..02e136076a9cf
--- /dev/null
+++ b/flang/test/Fir/omp_target_freemem.fir
@@ -0,0 +1,28 @@
+// RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_nonchar(
+// CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
+func.func @omp_target_allocmem_array_of_nonchar() -> () {
+  %device = arith.constant 0 : i32
+  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
+  fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3xi32>>
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_char(
+// CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
+func.func @omp_target_allocmem_array_of_char() -> () {
+  %device = arith.constant 0 : i32
+  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
+  fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3x!fir.char<1,10>>>
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_dynchar(
+// CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
+func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> () {
+  %device = arith.constant 0 : i32
+  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
+  fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3x!fir.char<1,?>>>
+  return
+}

>From d929a4cf5877284e03c1bf0269ab9e2fa69aebc6 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 26 Jun 2025 10:32:21 +0530
Subject: [PATCH 3/8] [flang] Fix doc in td

---
 flang/include/flang/Optimizer/Dialect/FIROps.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 8fbce5a8a745d..1587ddd244976 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -560,7 +560,7 @@ def fir_OmpTargetAllocMemOp : fir_Op<"omp_target_allocmem",
 
 def fir_OmpTargetFreeMemOp : fir_Op<"omp_target_freemem",
   [MemoryEffects<[MemFree]>]> {
-  let summary = "free a heap object";
+  let summary = "free a heap object on an openmp device";
 
   let description = [{
     Deallocates a heap memory reference that was allocated by an `omp_target_allocmem`.
@@ -569,7 +569,7 @@ def fir_OmpTargetFreeMemOp : fir_Op<"omp_target_freemem",
     ```
       %device = arith.constant 0 : i32
       %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
-      fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<?xf32>>
+      fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3xi32>>
     ```
   }];
 

>From 293c3fac9c0d37bff4f12631a6dc14672ec78242 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 3 Jul 2025 12:22:55 +0530
Subject: [PATCH 4/8] [omp][mlir] Introduce TargetAllocMem and TargetFreeMem
 ops in openMP mlir dialect

---
 .../include/flang/Optimizer/Dialect/FIROps.td |  63 -------
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 102 +----------
 flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp | 161 ++++++++++++++++++
 flang/lib/Optimizer/Dialect/FIROps.cpp        |  88 +---------
 flang/test/Fir/omp_target_allocmem.fir        |  28 ---
 ...em.fir => omp_target_allocmem_freemem.fir} |  19 ++-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  64 +++++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 102 +++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  82 +++++++++
 .../ompenmp-target-allocmem-freemem.mlir      |  42 +++++
 10 files changed, 473 insertions(+), 278 deletions(-)
 delete mode 100644 flang/test/Fir/omp_target_allocmem.fir
 rename flang/test/Fir/{omp_target_freemem.fir => omp_target_allocmem_freemem.fir} (51%)
 create mode 100644 mlir/test/Target/LLVMIR/ompenmp-target-allocmem-freemem.mlir

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 1587ddd244976..99b5105ab365e 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -517,69 +517,6 @@ def fir_ZeroOp : fir_OneResultOp<"zero_bits", [NoMemoryEffect]> {
   let assemblyFormat = "type($intype) attr-dict";
 }
 
-def fir_OmpTargetAllocMemOp : fir_Op<"omp_target_allocmem",
-    [MemoryEffects<[MemAlloc<DefaultResource>]>, AttrSizedOperandSegments]> {
-  let summary = "allocate storage on an openmp device for an object of a given type";
-
-  let description = [{
-    Creates a heap memory reference suitable for storing a value of the
-    given type, T.  The heap refernce returned has type `!fir.heap<T>`.
-    The memory object is in an undefined state.  `omp_target_allocmem` operations must
-    be paired with `omp_target_freemem` operations to avoid memory leaks.
-
-    ```
-      %device = arith.constant 0 : i32
-      %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
-    ```
-  }];
-
-  let arguments = (ins
-    Arg<AnyIntegerType>:$device,
-    TypeAttr:$in_type,
-    OptionalAttr<StrAttr>:$uniq_name,
-    OptionalAttr<StrAttr>:$bindc_name,
-    Variadic<AnyIntegerType>:$typeparams,
-    Variadic<AnyIntegerType>:$shape
-  );
-  let results = (outs fir_HeapType);
-
-  let hasCustomAssemblyFormat = 1;
-  let hasVerifier = 1;
-
-  let extraClassDeclaration = [{
-    mlir::Type getAllocatedType();
-    bool hasLenParams() { return !getTypeparams().empty(); }
-    bool hasShapeOperands() { return !getShape().empty(); }
-    unsigned numLenParams() { return getTypeparams().size(); }
-    operand_range getLenParams() { return getTypeparams(); }
-    unsigned numShapeOperands() { return getShape().size(); }
-    operand_range getShapeOperands() { return getShape(); }
-    static mlir::Type getRefTy(mlir::Type ty);
-  }];
-}
-
-def fir_OmpTargetFreeMemOp : fir_Op<"omp_target_freemem",
-  [MemoryEffects<[MemFree]>]> {
-  let summary = "free a heap object on an openmp device";
-
-  let description = [{
-    Deallocates a heap memory reference that was allocated by an `omp_target_allocmem`.
-    The memory object that is deallocated is placed in an undefined state
-    after `fir.omp_target_freemem`.
-    ```
-      %device = arith.constant 0 : i32
-      %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
-      fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3xi32>>
-    ```
-  }];
-
-  let arguments = (ins
-  Arg<AnyIntegerType, "", [MemFree]>:$device,
-  Arg<fir_HeapType, "", [MemFree]>:$heapref
-  );
-  let assemblyFormat = "$device `,` $heapref attr-dict `:` type($device) `,` qualified(type($heapref))";
-}
-
 //===----------------------------------------------------------------------===//
 // Terminator operations
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index fc7a21bcbe69f..7940743763c5c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1206,105 +1206,6 @@ struct FreeMemOpConversion : public fir::FIROpConversion<fir::FreeMemOp> {
 };
 } // namespace
 
-static mlir::LLVM::LLVMFuncOp getOmpTargetAlloc(mlir::Operation *op) {
-  auto module = op->getParentOfType<mlir::ModuleOp>();
-  if (mlir::LLVM::LLVMFuncOp mallocFunc =
-          module.lookupSymbol<mlir::LLVM::LLVMFuncOp>("omp_target_alloc"))
-    return mallocFunc;
-  mlir::OpBuilder moduleBuilder(module.getBodyRegion());
-  auto i64Ty = mlir::IntegerType::get(module->getContext(), 64);
-  auto i32Ty = mlir::IntegerType::get(module->getContext(), 32);
-  return moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
-      moduleBuilder.getUnknownLoc(), "omp_target_alloc",
-      mlir::LLVM::LLVMFunctionType::get(
-          mlir::LLVM::LLVMPointerType::get(module->getContext()),
-          {i64Ty, i32Ty},
-          /*isVarArg=*/false));
-}
-
-namespace {
-struct OmpTargetAllocMemOpConversion
-    : public fir::FIROpConversion<fir::OmpTargetAllocMemOp> {
-  using FIROpConversion::FIROpConversion;
-
-  mlir::LogicalResult
-  matchAndRewrite(fir::OmpTargetAllocMemOp heap, OpAdaptor adaptor,
-                  mlir::ConversionPatternRewriter &rewriter) const override {
-    mlir::Type heapTy = heap.getType();
-    mlir::LLVM::LLVMFuncOp mallocFunc = getOmpTargetAlloc(heap);
-    mlir::Location loc = heap.getLoc();
-    auto ity = lowerTy().indexType();
-    mlir::Type dataTy = fir::unwrapRefType(heapTy);
-    mlir::Type llvmObjectTy = convertObjectType(dataTy);
-    if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
-      TODO(loc, "fir.omp_target_allocmem codegen of derived type with length "
-                "parameters");
-    mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy);
-    if (auto scaleSize = genAllocationScaleSize(heap, ity, rewriter))
-      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
-    for (mlir::Value opnd : adaptor.getOperands().drop_front())
-      size = rewriter.create<mlir::LLVM::MulOp>(
-          loc, ity, size, integerCast(loc, rewriter, ity, opnd));
-    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
-    auto mallocTy =
-        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
-    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
-      size = integerCast(loc, rewriter, mallocTy, size);
-    heap->setAttr("callee", mlir::SymbolRefAttr::get(mallocFunc));
-    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
-        heap, ::getLlvmPtrType(heap.getContext()),
-        mlir::SmallVector<mlir::Value, 2>({size, heap.getDevice()}),
-        addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 2));
-    return mlir::success();
-  }
-
-  /// Compute the allocation size in bytes of the element type of
-  /// \p llTy pointer type. The result is returned as a value of \p idxTy
-  /// integer type.
-  mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
-                                 mlir::ConversionPatternRewriter &rewriter,
-                                 mlir::Type llTy) const {
-    return computeElementDistance(loc, llTy, idxTy, rewriter, getDataLayout());
-  }
-};
-} // namespace
-
-static mlir::LLVM::LLVMFuncOp getOmpTargetFree(mlir::Operation *op) {
-  auto module = op->getParentOfType<mlir::ModuleOp>();
-  if (mlir::LLVM::LLVMFuncOp freeFunc =
-          module.lookupSymbol<mlir::LLVM::LLVMFuncOp>("omp_target_free"))
-    return freeFunc;
-  mlir::OpBuilder moduleBuilder(module.getBodyRegion());
-  auto i32Ty = mlir::IntegerType::get(module->getContext(), 32);
-  return moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
-      moduleBuilder.getUnknownLoc(), "omp_target_free",
-      mlir::LLVM::LLVMFunctionType::get(
-          mlir::LLVM::LLVMVoidType::get(module->getContext()),
-          {getLlvmPtrType(module->getContext()), i32Ty},
-          /*isVarArg=*/false));
-}
-
-namespace {
-struct OmpTargetFreeMemOpConversion
-    : public fir::FIROpConversion<fir::OmpTargetFreeMemOp> {
-  using FIROpConversion::FIROpConversion;
-
-  mlir::LogicalResult
-  matchAndRewrite(fir::OmpTargetFreeMemOp freemem, OpAdaptor adaptor,
-                  mlir::ConversionPatternRewriter &rewriter) const override {
-    mlir::LLVM::LLVMFuncOp freeFunc = getOmpTargetFree(freemem);
-    mlir::Location loc = freemem.getLoc();
-    freemem->setAttr("callee", mlir::SymbolRefAttr::get(freeFunc));
-    rewriter.create<mlir::LLVM::CallOp>(
-        loc, mlir::TypeRange{},
-        mlir::ValueRange{adaptor.getHeapref(), freemem.getDevice()},
-        addLLVMOpBundleAttrs(rewriter, freemem->getAttrs(), 2));
-    rewriter.eraseOp(freemem);
-    return mlir::success();
-  }
-};
-} // namespace
-
 // Convert subcomponent array indices from column-major to row-major ordering.
 static llvm::SmallVector<mlir::Value>
 convertSubcomponentIndices(mlir::Location loc, mlir::Type eleTy,
@@ -4433,8 +4334,7 @@ void fir::populateFIRToLLVMConversionPatterns(
       GlobalLenOpConversion, GlobalOpConversion, InsertOnRangeOpConversion,
       IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion,
       LocalitySpecifierOpConversion, MulcOpConversion, NegcOpConversion,
-      NoReassocOpConversion, OmpTargetAllocMemOpConversion,
-      OmpTargetFreeMemOpConversion, SelectCaseOpConversion, SelectOpConversion,
+      NoReassocOpConversion, SelectCaseOpConversion, SelectOpConversion,
       SelectRankOpConversion, SelectTypeOpConversion, ShapeOpConversion,
       ShapeShiftOpConversion, ShiftOpConversion, SliceOpConversion,
       StoreOpConversion, StringLitOpConversion, SubcOpConversion,
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index 37f1c9f97e1ce..a04c5d7eb7ee7 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -125,10 +125,171 @@ struct PrivateClauseOpConversion
     return mlir::success();
   }
 };
+
+static mlir::LLVM::LLVMFuncOp getOmpTargetAlloc(mlir::Operation *op) {
+  auto module = op->getParentOfType<mlir::ModuleOp>();
+  if (mlir::LLVM::LLVMFuncOp mallocFunc =
+          module.lookupSymbol<mlir::LLVM::LLVMFuncOp>("omp_target_alloc"))
+    return mallocFunc;
+  mlir::OpBuilder moduleBuilder(module.getBodyRegion());
+  auto i64Ty = mlir::IntegerType::get(module->getContext(), 64);
+  auto i32Ty = mlir::IntegerType::get(module->getContext(), 32);
+  return moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
+      moduleBuilder.getUnknownLoc(), "omp_target_alloc",
+      mlir::LLVM::LLVMFunctionType::get(
+          mlir::LLVM::LLVMPointerType::get(module->getContext()),
+          {i64Ty, i32Ty},
+          /*isVarArg=*/false));
+}
+
+static mlir::Type
+convertObjectType(const fir::LLVMTypeConverter &converter, mlir::Type firType) {
+  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(firType))
+    return converter.convertBoxTypeAsStruct(boxTy);
+  return converter.convertType(firType);
+}
+
+static llvm::SmallVector<mlir::NamedAttribute>
+addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
+                     llvm::ArrayRef<mlir::NamedAttribute> attrs,
+                     int32_t numCallOperands) {
+  llvm::SmallVector<mlir::NamedAttribute> newAttrs;
+  newAttrs.reserve(attrs.size() + 2);
+
+  for (mlir::NamedAttribute attr : attrs) {
+    if (attr.getName() != "operandSegmentSizes")
+      newAttrs.push_back(attr);
+  }
+
+  newAttrs.push_back(rewriter.getNamedAttr(
+      "operandSegmentSizes",
+      rewriter.getDenseI32ArrayAttr({numCallOperands, 0})));
+  newAttrs.push_back(rewriter.getNamedAttr("op_bundle_sizes",
+                                           rewriter.getDenseI32ArrayAttr({})));
+  return newAttrs;
+}
+
+static mlir::LLVM::ConstantOp
+genConstantIndex(mlir::Location loc, mlir::Type ity,
+                 mlir::ConversionPatternRewriter &rewriter,
+                 std::int64_t offset) {
+  auto cattr = rewriter.getI64IntegerAttr(offset);
+  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
+}
+
+static mlir::Value
+computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
+                       mlir::Type idxTy,
+                       mlir::ConversionPatternRewriter &rewriter,
+                       const mlir::DataLayout &dataLayout) {
+  llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
+  unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
+  std::int64_t distance = llvm::alignTo(size, alignment);
+  return genConstantIndex(loc, idxTy, rewriter, distance);
+}
+
+static mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
+                                 mlir::ConversionPatternRewriter &rewriter,
+                                 mlir::Type llTy, const mlir::DataLayout &dataLayout) {
+  return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
+}
+
+template <typename OP>
+static mlir::Value
+genAllocationScaleSize(OP op, mlir::Type ity,
+                       mlir::ConversionPatternRewriter &rewriter) {
+  mlir::Location loc = op.getLoc();
+  mlir::Type dataTy = op.getInType();
+  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
+  fir::SequenceType::Extent constSize = 1;
+  if (seqTy) {
+    int constRows = seqTy.getConstantRows();
+    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
+    if (constRows != static_cast<int>(shape.size())) {
+      for (auto extent : shape) {
+        if (constRows-- > 0)
+          continue;
+        if (extent != fir::SequenceType::getUnknownExtent())
+          constSize *= extent;
+      }
+    }
+  }
+
+  if (constSize != 1) {
+    mlir::Value constVal{
+        genConstantIndex(loc, ity, rewriter, constSize).getResult()};
+    return constVal;
+  }
+  return nullptr;
+}
+
+static mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
+    mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+    mlir::Type ty, mlir::Value val, bool fold = false) {
+  auto valTy = val.getType();
+  // If the value was not yet lowered, lower its type so that it can
+  // be used in getPrimitiveTypeSizeInBits.
+  if (!mlir::isa<mlir::IntegerType>(valTy))
+    valTy = converter.convertType(valTy);
+  auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
+  auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy);
+  if (fold) {
+    if (toSize < fromSize)
+      return rewriter.createOrFold<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
+  } else {
+    if (toSize < fromSize)
+      return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
+  }
+  return val;
+}
+
+// FIR Op specific conversion for TargetAllocMemOp
+struct TargetAllocMemOpConversion
+    : public OpenMPFIROpConversion<mlir::omp::TargetAllocMemOp> {
+  using OpenMPFIROpConversion::OpenMPFIROpConversion;
+
+  llvm::LogicalResult
+  matchAndRewrite(mlir::omp::TargetAllocMemOp allocmemOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Type heapTy = allocmemOp.getAllocatedType();
+    mlir::LLVM::LLVMFuncOp mallocFunc = getOmpTargetAlloc(allocmemOp);
+    mlir::Location loc = allocmemOp.getLoc();
+    auto ity = lowerTy().indexType();
+    mlir::Type dataTy = fir::unwrapRefType(heapTy);
+    mlir::Type llvmObjectTy = convertObjectType(lowerTy(), dataTy);
+    mlir::Type llvmPtrTy = mlir::LLVM::LLVMPointerType::get(allocmemOp.getContext(), 0);
+    if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
+      TODO(loc, "omp.target_allocmem codegen of derived type with length "
+                "parameters");
+    mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy, lowerTy().getDataLayout());
+    if (auto scaleSize = genAllocationScaleSize(allocmemOp, ity, rewriter))
+      size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
+    for (mlir::Value opnd : adaptor.getOperands().drop_front())
+      size = rewriter.create<mlir::LLVM::MulOp>(
+          loc, ity, size, integerCast(lowerTy(), loc, rewriter, ity, opnd));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(lowerTy(), loc, rewriter, mallocTy, size);
+    allocmemOp->setAttr("callee", mlir::SymbolRefAttr::get(mallocFunc));
+    auto callOp = rewriter.create<mlir::LLVM::CallOp>(
+        loc, llvmPtrTy,
+        mlir::SmallVector<mlir::Value, 2>({size, allocmemOp.getDevice()}),
+        addLLVMOpBundleAttrs(rewriter, allocmemOp->getAttrs(), 2));
+    rewriter.replaceOpWithNewOp<mlir::LLVM::PtrToIntOp>(allocmemOp, rewriter.getIntegerType(64), callOp.getResult());
+    return mlir::success();
+  }
+};
 } // namespace
 
 void fir::populateOpenMPFIRToLLVMConversionPatterns(
     const LLVMTypeConverter &converter, mlir::RewritePatternSet &patterns) {
   patterns.add<MapInfoOpConversion>(converter);
   patterns.add<PrivateClauseOpConversion>(converter);
+  patterns.add<TargetAllocMemOpConversion>(converter);
 }
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 38862953b8001..4558057bec321 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -106,38 +106,24 @@ static bool verifyTypeParamCount(mlir::Type inType, unsigned numParams) {
   return false;
 }
 
-/// Parser shared by Alloca, Allocmem and OmpTargetAllocmem
+/// Parser shared by Alloca and Allocmem
 /// boolean flag isTargetOp is used to identify omp_target_allocmem
 /// operation ::= %res = (`fir.alloca` | `fir.allocmem`) $in_type
 ///                      ( `(` $typeparams `)` )? ( `,` $shape )?
 ///                      attr-dict-without-keyword
-/// operation ::= %res = (`fir.omp_target_alloca`) $device : devicetype,
-///                      $in_type ( `(` $typeparams `)` )? ( `,` $shape )?
-///                      attr-dict-without-keyword
 template <typename FN>
-static mlir::ParseResult
-parseAllocatableOp(FN wrapResultType, mlir::OpAsmParser &parser,
-                   mlir::OperationState &result, bool isTargetOp = false) {
-  auto &builder = parser.getBuilder();
-  bool hasOperands = false;
-  std::int32_t typeparamsSize = 0;
-  // Parse device number as a new operand
-  if (isTargetOp) {
-    mlir::OpAsmParser::UnresolvedOperand deviceOperand;
-    mlir::Type deviceType;
-    if (parser.parseOperand(deviceOperand) || parser.parseColonType(deviceType))
-      return mlir::failure();
-    if (parser.resolveOperand(deviceOperand, deviceType, result.operands))
-      return mlir::failure();
-    if (parser.parseComma())
-      return mlir::failure();
-  }
+static mlir::ParseResult parseAllocatableOp(FN wrapResultType,
+                                            mlir::OpAsmParser &parser,
+                                            mlir::OperationState &result) {
   mlir::Type intype;
   if (parser.parseType(intype))
     return mlir::failure();
+  auto &builder = parser.getBuilder();
   result.addAttribute("in_type", mlir::TypeAttr::get(intype));
   llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> operands;
   llvm::SmallVector<mlir::Type> typeVec;
+  bool hasOperands = false;
+  std::int32_t typeparamsSize = 0;
   if (!parser.parseOptionalLParen()) {
     // parse the LEN params of the derived type. (<params> : <types>)
     if (parser.parseOperandList(operands, mlir::OpAsmParser::Delimiter::None) ||
@@ -161,19 +147,13 @@ parseAllocatableOp(FN wrapResultType, mlir::OpAsmParser &parser,
       parser.resolveOperands(operands, typeVec, parser.getNameLoc(),
                              result.operands))
     return mlir::failure();
-
   mlir::Type restype = wrapResultType(intype);
   if (!restype) {
     parser.emitError(parser.getNameLoc(), "invalid allocate type: ") << intype;
     return mlir::failure();
   }
-  llvm::SmallVector<std::int32_t> segmentSizes;
-  if (isTargetOp)
-    segmentSizes.push_back(1);
-  segmentSizes.push_back(typeparamsSize);
-  segmentSizes.push_back(shapeSize);
-  result.addAttribute("operandSegmentSizes",
-                      builder.getDenseI32ArrayAttr(segmentSizes));
+  result.addAttribute("operandSegmentSizes", builder.getDenseI32ArrayAttr(
+                                                 {typeparamsSize, shapeSize}));
   if (parser.parseOptionalAttrDict(result.attributes) ||
       parser.addTypeToList(restype, result.types))
     return mlir::failure();
@@ -405,56 +385,6 @@ llvm::LogicalResult fir::AllocMemOp::verify() {
   return mlir::success();
 }
 
-//===----------------------------------------------------------------------===//
-// OmpTargetAllocMemOp
-//===----------------------------------------------------------------------===//
-
-mlir::Type fir::OmpTargetAllocMemOp::getAllocatedType() {
-  return mlir::cast<fir::HeapType>(getType()).getEleTy();
-}
-
-mlir::Type fir::OmpTargetAllocMemOp::getRefTy(mlir::Type ty) {
-  return fir::HeapType::get(ty);
-}
-
-mlir::ParseResult
-fir::OmpTargetAllocMemOp::parse(mlir::OpAsmParser &parser,
-                                mlir::OperationState &result) {
-  return parseAllocatableOp(wrapAllocMemResultType, parser, result, true);
-}
-
-void fir::OmpTargetAllocMemOp::print(mlir::OpAsmPrinter &p) {
-  p << " ";
-  p.printOperand(getDevice());
-  p << " : ";
-  p << getDevice().getType();
-  p << ", ";
-  p << getInType();
-  if (!getTypeparams().empty()) {
-    p << '(' << getTypeparams() << " : " << getTypeparams().getTypes() << ')';
-  }
-  for (auto sh : getShape()) {
-    p << ", ";
-    p.printOperand(sh);
-  }
-  p.printOptionalAttrDict((*this)->getAttrs(),
-                          {"in_type", "operandSegmentSizes"});
-}
-
-llvm::LogicalResult fir::OmpTargetAllocMemOp::verify() {
-  llvm::SmallVector<llvm::StringRef> visited;
-  if (verifyInType(getInType(), visited, numShapeOperands()))
-    return emitOpError("invalid type for allocation");
-  if (verifyTypeParamCount(getInType(), numLenParams()))
-    return emitOpError("LEN params do not correspond to type");
-  mlir::Type outType = getType();
-  if (!mlir::dyn_cast<fir::HeapType>(outType))
-    return emitOpError("must be a !fir.heap type");
-  if (fir::isa_unknown_size_box(fir::dyn_cast_ptrEleTy(outType)))
-    return emitOpError("cannot allocate !fir.box of unknown rank or type");
-  return mlir::success();
-}
-
 //===----------------------------------------------------------------------===//
 // ArrayCoorOp
 //===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/omp_target_allocmem.fir b/flang/test/Fir/omp_target_allocmem.fir
deleted file mode 100644
index 5140c91c9510c..0000000000000
--- a/flang/test/Fir/omp_target_allocmem.fir
+++ /dev/null
@@ -1,28 +0,0 @@
-// RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
-
-// CHECK-LABEL: define ptr @omp_target_allocmem_array_of_nonchar(
-// CHECK: call ptr @omp_target_alloc(i64 36, i32 0)
-func.func @omp_target_allocmem_array_of_nonchar() -> !fir.heap<!fir.array<3x3xi32>> {
-  %device = arith.constant 0 : i32
-  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
-  return %1 : !fir.heap<!fir.array<3x3xi32>>
-}
-
-// CHECK-LABEL: define ptr @omp_target_allocmem_array_of_char(
-// CHECK: call ptr @omp_target_alloc(i64 90, i32 0)
-func.func @omp_target_allocmem_array_of_char() -> !fir.heap<!fir.array<3x3x!fir.char<1,10>>> {
-  %device = arith.constant 0 : i32
-  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
-  return %1 : !fir.heap<!fir.array<3x3x!fir.char<1,10>>>
-}
-
-// CHECK-LABEL: define ptr @omp_target_allocmem_array_of_dynchar(
-// CHECK-SAME: i32 %[[len:.*]])
-// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
-// CHECK: %[[mul2:.*]] = mul i64 9, %[[mul1]]
-// CHECK: call ptr @omp_target_alloc(i64 %[[mul2]], i32 0)
-func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> !fir.heap<!fir.array<3x3x!fir.char<1,?>>> {
-  %device = arith.constant 0 : i32
-  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
-  return %1 : !fir.heap<!fir.array<3x3x!fir.char<1,?>>>
-}
diff --git a/flang/test/Fir/omp_target_freemem.fir b/flang/test/Fir/omp_target_allocmem_freemem.fir
similarity index 51%
rename from flang/test/Fir/omp_target_freemem.fir
rename to flang/test/Fir/omp_target_allocmem_freemem.fir
index 02e136076a9cf..9202202728454 100644
--- a/flang/test/Fir/omp_target_freemem.fir
+++ b/flang/test/Fir/omp_target_allocmem_freemem.fir
@@ -1,28 +1,33 @@
 // RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
 
 // CHECK-LABEL: define void @omp_target_allocmem_array_of_nonchar(
+// CHECK: call ptr @omp_target_alloc(i64 36, i32 0)
 // CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
 func.func @omp_target_allocmem_array_of_nonchar() -> () {
   %device = arith.constant 0 : i32
-  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3xi32>
-  fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3xi32>>
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x3xi32>
+  omp.target_freemem %device, %1 : i32, i64
   return
 }
 
 // CHECK-LABEL: define void @omp_target_allocmem_array_of_char(
+// CHECK: call ptr @omp_target_alloc(i64 90, i32 0)
 // CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
 func.func @omp_target_allocmem_array_of_char() -> () {
   %device = arith.constant 0 : i32
-  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
-  fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3x!fir.char<1,10>>>
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
+  omp.target_freemem %device, %1 : i32, i64
   return
 }
 
 // CHECK-LABEL: define void @omp_target_allocmem_array_of_dynchar(
-// CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
+// CHECK-SAME: i32 %[[len:.*]])
+// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
+// CHECK: %[[mul2:.*]] = mul i64 9, %[[mul1]]
+// CHECK: call ptr @omp_target_alloc(i64 %[[mul2]], i32 0)
 func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> () {
   %device = arith.constant 0 : i32
-  %1 = fir.omp_target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
-  fir.omp_target_freemem %device, %1 : i32, !fir.heap<!fir.array<3x3x!fir.char<1,?>>>
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
+  omp.target_freemem %device, %1 : i32, i64
   return
 }
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 8cf18b43450ab..e81f57f404ea0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2113,4 +2113,68 @@ def AllocateDirOp : OpenMP_Op<"allocate_dir", clauses = [
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// TargetAllocMemOp
+//===----------------------------------------------------------------------===//
+
+def TargetAllocMemOp : OpenMP_Op<"target_allocmem",
+    [MemoryEffects<[MemAlloc<DefaultResource>]>, AttrSizedOperandSegments]> {
+  let summary = "allocate storage on an openmp device for an object of a given type";
+
+  let description = [{
+    Allocates memory on the specified OpenMP device for an object of the given type.
+    Returns an integer value representing the device pointer to the allocated memory.
+    The memory is uninitialized after allocation. Operations must be paired with 
+    `omp.target_freemem` to avoid memory leaks.
+
+    ```mlir
+      %device = arith.constant 0 : i32
+      %ptr = omp.target_allocmem %device : i32, vector<3x3xi32>
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<AnyInteger>:$device,
+    TypeAttr:$in_type,
+    OptionalAttr<StrAttr>:$uniq_name,
+    OptionalAttr<StrAttr>:$bindc_name,
+    Variadic<AnyInteger>:$typeparams,
+    Variadic<AnyInteger>:$shape
+  );
+  let results = (outs I64);
+
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    mlir::Type getAllocatedType();
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetFreeMemOp
+//===----------------------------------------------------------------------===//
+
+def TargetFreeMemOp : OpenMP_Op<"target_freemem",
+  [MemoryEffects<[MemFree]>]> {
+  let summary = "free memory on an openmp device";
+
+  let description = [{
+    Deallocates memory on the specified OpenMP device that was previously 
+    allocated by an `omp.target_allocmem` operation. The memory is placed 
+    in an undefined state after deallocation.
+    ```
+      %device = arith.constant 0 : i32
+      %ptr = omp.target_allocmem %device : i32, vector<3x3xi32>
+      omp.target_freemem %device, %ptr : i32, i64
+    ```
+  }];
+
+  let arguments = (ins
+  Arg<AnyInteger, "", [MemFree]>:$device,
+  Arg<I64, "", [MemFree]>:$heapref
+  );
+  let assemblyFormat = "$device `,` $heapref attr-dict `:` type($device) `,` qualified(type($heapref))";
+}
+
 #endif // OPENMP_OPS
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 769aee64e1695..dfd2f5f275a07 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3878,6 +3878,108 @@ LogicalResult AllocateDirOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// TargetAllocMemOp
+//===----------------------------------------------------------------------===//
+
+mlir::Type omp::TargetAllocMemOp::getAllocatedType() {
+  return getInTypeAttr().getValue();
+}
+
+/// operation ::= %res = (`omp.target_alloc_mem`) $device : devicetype,
+///                      $in_type ( `(` $typeparams `)` )? ( `,` $shape )?
+///                      attr-dict-without-keyword
+static mlir::ParseResult parseTargetAllocMemOp(mlir::OpAsmParser &parser,
+                                               mlir::OperationState &result) {
+  auto &builder = parser.getBuilder();
+  bool hasOperands = false;
+  std::int32_t typeparamsSize = 0;
+
+  // Parse device number as a new operand
+  mlir::OpAsmParser::UnresolvedOperand deviceOperand;
+  mlir::Type deviceType;
+  if (parser.parseOperand(deviceOperand) || parser.parseColonType(deviceType))
+    return mlir::failure();
+  if (parser.resolveOperand(deviceOperand, deviceType, result.operands))
+    return mlir::failure();
+  if (parser.parseComma())
+    return mlir::failure();
+
+  mlir::Type intype;
+  if (parser.parseType(intype))
+    return mlir::failure();
+  result.addAttribute("in_type", mlir::TypeAttr::get(intype));
+  llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand> operands;
+  llvm::SmallVector<mlir::Type> typeVec;
+  if (!parser.parseOptionalLParen()) {
+    // parse the LEN params of the derived type. (<params> : <types>)
+    if (parser.parseOperandList(operands, mlir::OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(typeVec) || parser.parseRParen())
+      return mlir::failure();
+    typeparamsSize = operands.size();
+    hasOperands = true;
+  }
+  std::int32_t shapeSize = 0;
+  if (!parser.parseOptionalComma()) {
+    // parse size to scale by, vector of n dimensions of type index
+    if (parser.parseOperandList(operands, mlir::OpAsmParser::Delimiter::None))
+      return mlir::failure();
+    shapeSize = operands.size() - typeparamsSize;
+    auto idxTy = builder.getIndexType();
+    for (std::int32_t i = typeparamsSize, end = operands.size(); i != end; ++i)
+      typeVec.push_back(idxTy);
+    hasOperands = true;
+  }
+  if (hasOperands &&
+      parser.resolveOperands(operands, typeVec, parser.getNameLoc(),
+                             result.operands))
+    return mlir::failure();
+
+  mlir::Type restype = builder.getIntegerType(64);
+  ;
+  if (!restype) {
+    parser.emitError(parser.getNameLoc(), "invalid allocate type: ") << intype;
+    return mlir::failure();
+  }
+  llvm::SmallVector<std::int32_t> segmentSizes{1, typeparamsSize, shapeSize};
+  result.addAttribute("operandSegmentSizes",
+                      builder.getDenseI32ArrayAttr(segmentSizes));
+  if (parser.parseOptionalAttrDict(result.attributes) ||
+      parser.addTypeToList(restype, result.types))
+    return mlir::failure();
+  return mlir::success();
+}
+
+mlir::ParseResult omp::TargetAllocMemOp::parse(mlir::OpAsmParser &parser,
+                                               mlir::OperationState &result) {
+  return parseTargetAllocMemOp(parser, result);
+}
+
+void omp::TargetAllocMemOp::print(mlir::OpAsmPrinter &p) {
+  p << " ";
+  p.printOperand(getDevice());
+  p << " : ";
+  p << getDevice().getType();
+  p << ", ";
+  p << getInType();
+  if (!getTypeparams().empty()) {
+    p << '(' << getTypeparams() << " : " << getTypeparams().getTypes() << ')';
+  }
+  for (auto sh : getShape()) {
+    p << ", ";
+    p.printOperand(sh);
+  }
+  p.printOptionalAttrDict((*this)->getAttrs(),
+                          {"in_type", "operandSegmentSizes"});
+}
+
+llvm::LogicalResult omp::TargetAllocMemOp::verify() {
+  mlir::Type outType = getType();
+  if (!mlir::dyn_cast<IntegerType>(outType))
+    return emitOpError("must be a integer type");
+  return mlir::success();
+}
+
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 3185f28fe6681..f805c286ec4f8 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -5858,6 +5858,82 @@ static bool isTargetDeviceOp(Operation *op) {
   return false;
 }
 
+static llvm::Function *getOmpTargetAlloc(llvm::IRBuilderBase &builder,
+                                         llvm::Module *llvmModule) {
+  llvm::Type *i64Ty = builder.getInt64Ty();
+  llvm::Type *i32Ty = builder.getInt32Ty();
+  llvm::Type *returnType = builder.getPtrTy(0);
+  llvm::FunctionType *fnType =
+      llvm::FunctionType::get(returnType, {i64Ty, i32Ty}, false);
+  llvm::Function *func = cast<llvm::Function>(
+      llvmModule->getOrInsertFunction("omp_target_alloc", fnType).getCallee());
+  return func;
+}
+
+static LogicalResult
+convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                        LLVM::ModuleTranslation &moduleTranslation) {
+  auto allocMemOp = cast<omp::TargetAllocMemOp>(opInst);
+  if (!allocMemOp)
+    return failure();
+
+  // Get "omp_target_alloc" function
+  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+  llvm::Function *ompTargetAllocFunc = getOmpTargetAlloc(builder, llvmModule);
+  // Get the corresponding device value in llvm
+  mlir::Value deviceNum = allocMemOp.getDevice();
+  llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
+  // Get the allocation size.
+  llvm::DataLayout dataLayout = llvmModule->getDataLayout();
+  mlir::Type heapTy = allocMemOp.getAllocatedType();
+  llvm::Type *llvmHeapTy = moduleTranslation.convertType(heapTy);
+  llvm::TypeSize typeSize = dataLayout.getTypeStoreSize(llvmHeapTy);
+  llvm::ConstantInt *allocSize = builder.getInt64(typeSize.getFixedValue());
+  // Create call to "omp_target_alloc" with the args as translated llvm values.
+  llvm::CallInst *call =
+      builder.CreateCall(ompTargetAllocFunc, {allocSize, llvmDeviceNum});
+  llvm::Value *resultI64 = builder.CreatePtrToInt(call, builder.getInt64Ty());
+
+  // Map the result
+  moduleTranslation.mapValue(allocMemOp.getResult(), resultI64);
+  return success();
+}
+
+static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
+                                        llvm::Module *llvmModule) {
+  llvm::Type *ptrTy = builder.getPtrTy(0);
+  llvm::Type *i32Ty = builder.getInt32Ty();
+  llvm::Type *voidTy = builder.getVoidTy();
+  llvm::FunctionType *fnType =
+      llvm::FunctionType::get(voidTy, {ptrTy, i32Ty}, false);
+  llvm::Function *func = dyn_cast<llvm::Function>(
+      llvmModule->getOrInsertFunction("omp_target_free", fnType).getCallee());
+  return func;
+}
+
+static LogicalResult
+convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                       LLVM::ModuleTranslation &moduleTranslation) {
+  auto freeMemOp = cast<omp::TargetFreeMemOp>(opInst);
+  if (!freeMemOp)
+    return failure();
+
+  // Get "omp_target_free" function
+  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+  llvm::Function *ompTragetFreeFunc = getOmpTargetFree(builder, llvmModule);
+  // Get the corresponding device value in llvm
+  mlir::Value deviceNum = freeMemOp.getDevice();
+  llvm::Value *llvmDeviceNum = moduleTranslation.lookupValue(deviceNum);
+  // Get the corresponding heapref value in llvm
+  mlir::Value heapref = freeMemOp.getHeapref();
+  llvm::Value *llvmHeapref = moduleTranslation.lookupValue(heapref);
+  // Convert heapref int to ptr and call "omp_target_free"
+  llvm::Value *intToPtr =
+      builder.CreateIntToPtr(llvmHeapref, builder.getPtrTy(0));
+  builder.CreateCall(ompTragetFreeFunc, {intToPtr, llvmDeviceNum});
+  return success();
+}
+
 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
 /// OpenMP runtime calls).
 static LogicalResult
@@ -6032,6 +6108,12 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
             // the omp.canonical_loop.
             return applyUnrollHeuristic(op, builder, moduleTranslation);
           })
+          .Case([&](omp::TargetAllocMemOp) {
+            return convertTargetAllocMemOp(*op, builder, moduleTranslation);
+          })
+          .Case([&](omp::TargetFreeMemOp) {
+            return convertTargetFreeMemOp(*op, builder, moduleTranslation);
+          })
           .Default([&](Operation *inst) {
             return inst->emitError()
                    << "not yet implemented: " << inst->getName();
diff --git a/mlir/test/Target/LLVMIR/ompenmp-target-allocmem-freemem.mlir b/mlir/test/Target/LLVMIR/ompenmp-target-allocmem-freemem.mlir
new file mode 100644
index 0000000000000..1bc97609ccff4
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/ompenmp-target-allocmem-freemem.mlir
@@ -0,0 +1,42 @@
+// RUN: mlir-opt %s -convert-openmp-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s
+
+// This file contains MLIR test cases for omp.target_allocmem and omp.target_freemem
+
+// CHECK-LABEL: test_alloc_free_i64
+// CHECK: %[[ALLOC:.*]] = call ptr @omp_target_alloc(i64 8, i32 0)
+// CHECK: %[[PTRTOINT:.*]] = ptrtoint ptr %[[ALLOC]] to i64
+// CHECK: %[[INTTOPTR:.*]] = inttoptr i64 %[[PTRTOINT]] to ptr
+// CHECK: call void @omp_target_free(ptr %[[INTTOPTR]], i32 0)
+// CHECK: ret void
+llvm.func @test_alloc_free_i64() -> () {
+  %device = llvm.mlir.constant(0 : i32) : i32
+  %1 = omp.target_allocmem %device : i32, i64
+  omp.target_freemem %device, %1 : i32, i64
+  llvm.return
+}
+
+// CHECK-LABEL: test_alloc_free_vector_1d_f32
+// CHECK: %[[ALLOC:.*]] = call ptr @omp_target_alloc(i64 64, i32 0)
+// CHECK: %[[PTRTOINT:.*]] = ptrtoint ptr %[[ALLOC]] to i64
+// CHECK: %[[INTTOPTR:.*]] = inttoptr i64 %[[PTRTOINT]] to ptr
+// CHECK: call void @omp_target_free(ptr %[[INTTOPTR]], i32 0)
+// CHECK: ret void
+llvm.func @test_alloc_free_vector_1d_f32() -> () {
+  %device = llvm.mlir.constant(0 : i32) : i32
+  %1 = omp.target_allocmem %device : i32, vector<16xf32>
+  omp.target_freemem %device, %1 : i32, i64
+  llvm.return
+}
+
+// CHECK-LABEL: test_alloc_free_vector_2d_f32
+// CHECK: %[[ALLOC:.*]] = call ptr @omp_target_alloc(i64 1024, i32 0)
+// CHECK: %[[PTRTOINT:.*]] = ptrtoint ptr %[[ALLOC]] to i64
+// CHECK: %[[INTTOPTR:.*]] = inttoptr i64 %[[PTRTOINT]] to ptr
+// CHECK: call void @omp_target_free(ptr %[[INTTOPTR]], i32 0)
+// CHECK: ret void
+llvm.func @test_alloc_free_vector_2d_f32() -> () {
+  %device = llvm.mlir.constant(0 : i32) : i32
+  %1 = omp.target_allocmem %device : i32, vector<16x16xf32>
+  omp.target_freemem %device, %1 : i32, i64
+  llvm.return
+}

>From 43fcbe5680a14df8add324303ab8a8a0ef6f15cd Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 3 Jul 2025 21:05:35 +0530
Subject: [PATCH 5/8] Fix comments

---
 flang/lib/Optimizer/Dialect/FIROps.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 4558057bec321..958fc46c9e41c 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -107,7 +107,6 @@ static bool verifyTypeParamCount(mlir::Type inType, unsigned numParams) {
 }
 
 /// Parser shared by Alloca and Allocmem
-/// boolean flag isTargetOp is used to identify omp_target_allocmem
 /// operation ::= %res = (`fir.alloca` | `fir.allocmem`) $in_type
 ///                      ( `(` $typeparams `)` )? ( `,` $shape )?
 ///                      attr-dict-without-keyword

>From 2d461fee214d5aa4d0cba9dcdea2ddf9c283ff2a Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 9 Jul 2025 15:49:06 +0530
Subject: [PATCH 6/8] clang format

---
 flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index a04c5d7eb7ee7..14cc7bb511f0f 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -142,8 +142,8 @@ static mlir::LLVM::LLVMFuncOp getOmpTargetAlloc(mlir::Operation *op) {
           /*isVarArg=*/false));
 }
 
-static mlir::Type
-convertObjectType(const fir::LLVMTypeConverter &converter, mlir::Type firType) {
+static mlir::Type convertObjectType(const fir::LLVMTypeConverter &converter,
+                                    mlir::Type firType) {
   if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(firType))
     return converter.convertBoxTypeAsStruct(boxTy);
   return converter.convertType(firType);
@@ -189,8 +189,9 @@ computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
 }
 
 static mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
-                                 mlir::ConversionPatternRewriter &rewriter,
-                                 mlir::Type llTy, const mlir::DataLayout &dataLayout) {
+                                      mlir::ConversionPatternRewriter &rewriter,
+                                      mlir::Type llTy,
+                                      const mlir::DataLayout &dataLayout) {
   return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
 }
 
@@ -224,8 +225,10 @@ genAllocationScaleSize(OP op, mlir::Type ity,
 }
 
 static mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
-    mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
-    mlir::Type ty, mlir::Value val, bool fold = false) {
+                               mlir::Location loc,
+                               mlir::ConversionPatternRewriter &rewriter,
+                               mlir::Type ty, mlir::Value val,
+                               bool fold = false) {
   auto valTy = val.getType();
   // If the value was not yet lowered, lower its type so that it can
   // be used in getPrimitiveTypeSizeInBits.
@@ -261,11 +264,13 @@ struct TargetAllocMemOpConversion
     auto ity = lowerTy().indexType();
     mlir::Type dataTy = fir::unwrapRefType(heapTy);
     mlir::Type llvmObjectTy = convertObjectType(lowerTy(), dataTy);
-    mlir::Type llvmPtrTy = mlir::LLVM::LLVMPointerType::get(allocmemOp.getContext(), 0);
+    mlir::Type llvmPtrTy =
+        mlir::LLVM::LLVMPointerType::get(allocmemOp.getContext(), 0);
     if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
       TODO(loc, "omp.target_allocmem codegen of derived type with length "
                 "parameters");
-    mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy, lowerTy().getDataLayout());
+    mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy,
+                                          lowerTy().getDataLayout());
     if (auto scaleSize = genAllocationScaleSize(allocmemOp, ity, rewriter))
       size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands().drop_front())
@@ -281,7 +286,8 @@ struct TargetAllocMemOpConversion
         loc, llvmPtrTy,
         mlir::SmallVector<mlir::Value, 2>({size, allocmemOp.getDevice()}),
         addLLVMOpBundleAttrs(rewriter, allocmemOp->getAttrs(), 2));
-    rewriter.replaceOpWithNewOp<mlir::LLVM::PtrToIntOp>(allocmemOp, rewriter.getIntegerType(64), callOp.getResult());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::PtrToIntOp>(
+        allocmemOp, rewriter.getIntegerType(64), callOp.getResult());
     return mlir::success();
   }
 };

>From ca569ebe850a92c704ae4b75469ce23708c3ae3d Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 14 Jul 2025 19:12:23 +0530
Subject: [PATCH 7/8] Update TargetAllocMemOpConversion. Move utility functions
 to utils

Co-authored by @ergawy
---
 flang/include/flang/Optimizer/Support/Utils.h |  63 ++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 106 ++-----
 flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp | 148 +--------
 flang/lib/Optimizer/Support/Utils.cpp         |  51 ++++
 .../test/Fir/omp_target_allocmem_freemem.fir  | 281 +++++++++++++++++-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |   4 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |   1 -
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |   5 +-
 8 files changed, 427 insertions(+), 232 deletions(-)

diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 83c936b7dcada..002261e61adb0 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -27,6 +27,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 
+#include "flang/Optimizer/CodeGen/TypeConverter.h"
+
 namespace fir {
 /// Return the integer value of a arith::ConstantOp.
 inline std::int64_t toInt(mlir::arith::ConstantOp cop) {
@@ -198,6 +200,67 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault(
     fir::RecordType recordType, llvm::StringRef component,
     mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr);
 
+// Convert FIR type to LLVM without turning fir.box<T> into memory
+// reference.
+mlir::Type convertObjectType(const fir::LLVMTypeConverter &converter,
+                             mlir::Type firType);
+
+/// Generate a LLVM constant value of type `ity`, using the provided offset.
+mlir::LLVM::ConstantOp
+genConstantIndex(mlir::Location loc, mlir::Type ity,
+                 mlir::ConversionPatternRewriter &rewriter,
+                 std::int64_t offset);
+
+/// Helper function for generating the LLVM IR that computes the distance
+/// in bytes between adjacent elements pointed to by a pointer
+/// of type \p ptrTy. The result is returned as a value of \p idxTy integer
+/// type.
+mlir::Value computeElementDistance(mlir::Location loc,
+                                   mlir::Type llvmObjectType, mlir::Type idxTy,
+                                   mlir::ConversionPatternRewriter &rewriter,
+                                   const mlir::DataLayout &dataLayout);
+
+// Compute the alloc scale size (constant factors encoded in the array type).
+// We do this for arrays without a constant interior or arrays of character with
+// dynamic length arrays, since those are the only ones that get decayed to a
+// pointer to the element type.
+template <typename OP>
+inline mlir::Value
+genAllocationScaleSize(OP op, mlir::Type ity,
+                       mlir::ConversionPatternRewriter &rewriter) {
+  mlir::Location loc = op.getLoc();
+  mlir::Type dataTy = op.getInType();
+  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
+  fir::SequenceType::Extent constSize = 1;
+  if (seqTy) {
+    int constRows = seqTy.getConstantRows();
+    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
+    if (constRows != static_cast<int>(shape.size())) {
+      for (auto extent : shape) {
+        if (constRows-- > 0)
+          continue;
+        if (extent != fir::SequenceType::getUnknownExtent())
+          constSize *= extent;
+      }
+    }
+  }
+
+  if (constSize != 1) {
+    mlir::Value constVal{
+        fir::genConstantIndex(loc, ity, rewriter, constSize).getResult()};
+    return constVal;
+  }
+  return nullptr;
+}
+
+/// Perform an extension or truncation as needed on an integer value. Lowering
+/// to the specific target may involve some sign-extending or truncation of
+/// values, particularly to fit them from abstract box types to the
+/// appropriate reified structures.
+mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
+                        mlir::Location loc,
+                        mlir::ConversionPatternRewriter &rewriter,
+                        mlir::Type ty, mlir::Value val, bool fold = false);
 } // namespace fir
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 7940743763c5c..4cc1084aa56f8 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -85,14 +85,6 @@ static inline mlir::Type getI8Type(mlir::MLIRContext *context) {
   return mlir::IntegerType::get(context, 8);
 }
 
-static mlir::LLVM::ConstantOp
-genConstantIndex(mlir::Location loc, mlir::Type ity,
-                 mlir::ConversionPatternRewriter &rewriter,
-                 std::int64_t offset) {
-  auto cattr = rewriter.getI64IntegerAttr(offset);
-  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
-}
-
 static mlir::Block *createBlock(mlir::ConversionPatternRewriter &rewriter,
                                 mlir::Block *insertBefore) {
   assert(insertBefore && "expected valid insertion block");
@@ -203,39 +195,6 @@ getDependentTypeMemSizeFn(fir::RecordType recTy, fir::AllocaOp op,
   TODO(op.getLoc(), "did not find allocation function");
 }
 
-// Compute the alloc scale size (constant factors encoded in the array type).
-// We do this for arrays without a constant interior or arrays of character with
-// dynamic length arrays, since those are the only ones that get decayed to a
-// pointer to the element type.
-template <typename OP>
-static mlir::Value
-genAllocationScaleSize(OP op, mlir::Type ity,
-                       mlir::ConversionPatternRewriter &rewriter) {
-  mlir::Location loc = op.getLoc();
-  mlir::Type dataTy = op.getInType();
-  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
-  fir::SequenceType::Extent constSize = 1;
-  if (seqTy) {
-    int constRows = seqTy.getConstantRows();
-    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
-    if (constRows != static_cast<int>(shape.size())) {
-      for (auto extent : shape) {
-        if (constRows-- > 0)
-          continue;
-        if (extent != fir::SequenceType::getUnknownExtent())
-          constSize *= extent;
-      }
-    }
-  }
-
-  if (constSize != 1) {
-    mlir::Value constVal{
-        genConstantIndex(loc, ity, rewriter, constSize).getResult()};
-    return constVal;
-  }
-  return nullptr;
-}
-
 namespace {
 struct DeclareOpConversion : public fir::FIROpConversion<fir::cg::XDeclareOp> {
 public:
@@ -270,7 +229,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
     auto loc = alloc.getLoc();
     mlir::Type ity = lowerTy().indexType();
     unsigned i = 0;
-    mlir::Value size = genConstantIndex(loc, ity, rewriter, 1).getResult();
+    mlir::Value size = fir::genConstantIndex(loc, ity, rewriter, 1).getResult();
     mlir::Type firObjType = fir::unwrapRefType(alloc.getType());
     mlir::Type llvmObjectType = convertObjectType(firObjType);
     if (alloc.hasLenParams()) {
@@ -302,7 +261,7 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
                << scalarType << " with type parameters";
       }
     }
-    if (auto scaleSize = genAllocationScaleSize(alloc, ity, rewriter))
+    if (auto scaleSize = fir::genAllocationScaleSize(alloc, ity, rewriter))
       size =
           rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     if (alloc.hasShapeOperands()) {
@@ -479,7 +438,7 @@ struct BoxIsArrayOpConversion : public fir::FIROpConversion<fir::BoxIsArrayOp> {
     auto loc = boxisarray.getLoc();
     TypePair boxTyPair = getBoxTypePair(boxisarray.getVal().getType());
     mlir::Value rank = getRankFromBox(loc, boxTyPair, a, rewriter);
-    mlir::Value c0 = genConstantIndex(loc, rank.getType(), rewriter, 0);
+    mlir::Value c0 = fir::genConstantIndex(loc, rank.getType(), rewriter, 0);
     rewriter.replaceOpWithNewOp<mlir::LLVM::ICmpOp>(
         boxisarray, mlir::LLVM::ICmpPredicate::ne, rank, c0);
     return mlir::success();
@@ -815,7 +774,7 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
       // Do folding for constant inputs.
       if (auto constVal = fir::getIntIfConstant(op0)) {
         mlir::Value normVal =
-            genConstantIndex(loc, toTy, rewriter, *constVal ? 1 : 0);
+            fir::genConstantIndex(loc, toTy, rewriter, *constVal ? 1 : 0);
         rewriter.replaceOp(convert, normVal);
         return mlir::success();
       }
@@ -828,7 +787,7 @@ struct ConvertOpConversion : public fir::FIROpConversion<fir::ConvertOp> {
       }
 
       // Compare the input with zero.
-      mlir::Value zero = genConstantIndex(loc, fromTy, rewriter, 0);
+      mlir::Value zero = fir::genConstantIndex(loc, fromTy, rewriter, 0);
       auto isTrue = rewriter.create<mlir::LLVM::ICmpOp>(
           loc, mlir::LLVM::ICmpPredicate::ne, op0, zero);
 
@@ -1075,21 +1034,6 @@ static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
   return getMallocInModule(mod, op, rewriter, indexType);
 }
 
-/// Helper function for generating the LLVM IR that computes the distance
-/// in bytes between adjacent elements pointed to by a pointer
-/// of type \p ptrTy. The result is returned as a value of \p idxTy integer
-/// type.
-static mlir::Value
-computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
-                       mlir::Type idxTy,
-                       mlir::ConversionPatternRewriter &rewriter,
-                       const mlir::DataLayout &dataLayout) {
-  llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
-  unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
-  std::int64_t distance = llvm::alignTo(size, alignment);
-  return genConstantIndex(loc, idxTy, rewriter, distance);
-}
-
 /// Return value of the stride in bytes between adjacent elements
 /// of LLVM type \p llTy. The result is returned as a value of
 /// \p idxTy integer type.
@@ -1098,7 +1042,7 @@ genTypeStrideInBytes(mlir::Location loc, mlir::Type idxTy,
                      mlir::ConversionPatternRewriter &rewriter, mlir::Type llTy,
                      const mlir::DataLayout &dataLayout) {
   // Create a pointer type and use computeElementDistance().
-  return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
+  return fir::computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
 }
 
 namespace {
@@ -1117,7 +1061,7 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
       TODO(loc, "fir.allocmem codegen of derived type with length parameters");
     mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy);
-    if (auto scaleSize = genAllocationScaleSize(heap, ity, rewriter))
+    if (auto scaleSize = fir::genAllocationScaleSize(heap, ity, rewriter))
       size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands())
       size = rewriter.create<mlir::LLVM::MulOp>(
@@ -1140,7 +1084,8 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
   mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
                                  mlir::ConversionPatternRewriter &rewriter,
                                  mlir::Type llTy) const {
-    return computeElementDistance(loc, llTy, idxTy, rewriter, getDataLayout());
+    return fir::computeElementDistance(loc, llTy, idxTy, rewriter,
+                                       getDataLayout());
   }
 };
 } // namespace
@@ -1324,7 +1269,7 @@ genCUFAllocDescriptor(mlir::Location loc,
   mlir::Type structTy = typeConverter.convertBoxTypeAsStruct(boxTy);
   std::size_t boxSize = dl->getTypeSizeInBits(structTy) / 8;
   mlir::Value sizeInBytes =
-      genConstantIndex(loc, llvmIntPtrType, rewriter, boxSize);
+      fir::genConstantIndex(loc, llvmIntPtrType, rewriter, boxSize);
   llvm::SmallVector args = {sizeInBytes, sourceFile, sourceLine};
   return rewriter
       .create<mlir::LLVM::CallOp>(loc, fctTy, RTNAME_STRING(CUFAllocDescriptor),
@@ -1580,7 +1525,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
       // representation of derived types with pointer/allocatable components.
       // This has been seen in hashing algorithms using TRANSFER.
       mlir::Value zero =
-          genConstantIndex(loc, rewriter.getI64Type(), rewriter, 0);
+          fir::genConstantIndex(loc, rewriter.getI64Type(), rewriter, 0);
       descriptor = insertField(rewriter, loc, descriptor,
                                {getLenParamFieldId(boxTy), 0}, zero);
     }
@@ -1923,8 +1868,8 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
     bool hasSlice = !xbox.getSlice().empty();
     unsigned sliceOffset = xbox.getSliceOperandIndex();
     mlir::Location loc = xbox.getLoc();
-    mlir::Value zero = genConstantIndex(loc, i64Ty, rewriter, 0);
-    mlir::Value one = genConstantIndex(loc, i64Ty, rewriter, 1);
+    mlir::Value zero = fir::genConstantIndex(loc, i64Ty, rewriter, 0);
+    mlir::Value one = fir::genConstantIndex(loc, i64Ty, rewriter, 1);
     mlir::Value prevPtrOff = one;
     mlir::Type eleTy = boxTy.getEleTy();
     const unsigned rank = xbox.getRank();
@@ -1973,7 +1918,7 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
         prevDimByteStride =
             getCharacterByteSize(loc, rewriter, charTy, adaptor.getLenParams());
       } else {
-        prevDimByteStride = genConstantIndex(
+        prevDimByteStride = fir::genConstantIndex(
             loc, i64Ty, rewriter,
             charTy.getLen() * lowerTy().characterBitsize(charTy) / 8);
       }
@@ -2131,7 +2076,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
     if (auto charTy = mlir::dyn_cast<fir::CharacterType>(inputEleTy)) {
       if (charTy.hasConstantLen()) {
         mlir::Value len =
-            genConstantIndex(loc, idxTy, rewriter, charTy.getLen());
+            fir::genConstantIndex(loc, idxTy, rewriter, charTy.getLen());
         lenParams.emplace_back(len);
       } else {
         mlir::Value len = getElementSizeFromBox(loc, idxTy, inputBoxTyPair,
@@ -2140,7 +2085,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
           assert(!isInGlobalOp(rewriter) &&
                  "character target in global op must have constant length");
           mlir::Value width =
-              genConstantIndex(loc, idxTy, rewriter, charTy.getFKind());
+              fir::genConstantIndex(loc, idxTy, rewriter, charTy.getFKind());
           len = rewriter.create<mlir::LLVM::SDivOp>(loc, idxTy, len, width);
         }
         lenParams.emplace_back(len);
@@ -2194,8 +2139,9 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
                 mlir::ConversionPatternRewriter &rewriter) const {
     mlir::Location loc = rebox.getLoc();
     mlir::Value zero =
-        genConstantIndex(loc, lowerTy().indexType(), rewriter, 0);
-    mlir::Value one = genConstantIndex(loc, lowerTy().indexType(), rewriter, 1);
+        fir::genConstantIndex(loc, lowerTy().indexType(), rewriter, 0);
+    mlir::Value one =
+        fir::genConstantIndex(loc, lowerTy().indexType(), rewriter, 1);
     for (auto iter : llvm::enumerate(llvm::zip(extents, strides))) {
       mlir::Value extent = std::get<0>(iter.value());
       unsigned dim = iter.index();
@@ -2227,7 +2173,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
     mlir::Location loc = rebox.getLoc();
     mlir::Type byteTy = ::getI8Type(rebox.getContext());
     mlir::Type idxTy = lowerTy().indexType();
-    mlir::Value zero = genConstantIndex(loc, idxTy, rewriter, 0);
+    mlir::Value zero = fir::genConstantIndex(loc, idxTy, rewriter, 0);
     // Apply subcomponent and substring shift on base address.
     if (!rebox.getSubcomponent().empty() || !rebox.getSubstr().empty()) {
       // Cast to inputEleTy* so that a GEP can be used.
@@ -2255,7 +2201,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
     // and strides.
     llvm::SmallVector<mlir::Value> slicedExtents;
     llvm::SmallVector<mlir::Value> slicedStrides;
-    mlir::Value one = genConstantIndex(loc, idxTy, rewriter, 1);
+    mlir::Value one = fir::genConstantIndex(loc, idxTy, rewriter, 1);
     const bool sliceHasOrigins = !rebox.getShift().empty();
     unsigned sliceOps = rebox.getSliceOperandIndex();
     unsigned shiftOps = rebox.getShiftOperandIndex();
@@ -2328,7 +2274,7 @@ struct XReboxOpConversion : public EmboxCommonConversion<fir::cg::XReboxOp> {
     // which may be OK if all new extents are ones, the stride does not
     // matter, use one.
     mlir::Value stride = inputStrides.empty()
-                             ? genConstantIndex(loc, idxTy, rewriter, 1)
+                             ? fir::genConstantIndex(loc, idxTy, rewriter, 1)
                              : inputStrides[0];
     for (unsigned i = 0; i < rebox.getShape().size(); ++i) {
       mlir::Value rawExtent = operands[rebox.getShapeOperandIndex() + i];
@@ -2563,9 +2509,9 @@ struct XArrayCoorOpConversion
     unsigned shiftOffset = coor.getShiftOperandIndex();
     unsigned sliceOffset = coor.getSliceOperandIndex();
     auto sliceOps = coor.getSlice().begin();
-    mlir::Value one = genConstantIndex(loc, idxTy, rewriter, 1);
+    mlir::Value one = fir::genConstantIndex(loc, idxTy, rewriter, 1);
     mlir::Value prevExt = one;
-    mlir::Value offset = genConstantIndex(loc, idxTy, rewriter, 0);
+    mlir::Value offset = fir::genConstantIndex(loc, idxTy, rewriter, 0);
     const bool isShifted = !coor.getShift().empty();
     const bool isSliced = !coor.getSlice().empty();
     const bool baseIsBoxed =
@@ -2895,7 +2841,7 @@ struct CoordinateOpConversion
         // of lower bound aspects. This both accounts for dynamically sized
         // types and non contiguous arrays.
         auto idxTy = lowerTy().indexType();
-        mlir::Value off = genConstantIndex(loc, idxTy, rewriter, 0);
+        mlir::Value off = fir::genConstantIndex(loc, idxTy, rewriter, 0);
         unsigned arrayDim = arrTy.getDimension();
         for (unsigned dim = 0; dim < arrayDim && it != end; ++dim, ++it) {
           mlir::Value stride =
@@ -3808,7 +3754,7 @@ struct IsPresentOpConversion : public fir::FIROpConversion<fir::IsPresentOp> {
       ptr = rewriter.create<mlir::LLVM::ExtractValueOp>(loc, ptr, 0);
     }
     mlir::LLVM::ConstantOp c0 =
-        genConstantIndex(isPresent.getLoc(), idxTy, rewriter, 0);
+        fir::genConstantIndex(isPresent.getLoc(), idxTy, rewriter, 0);
     auto addr = rewriter.create<mlir::LLVM::PtrToIntOp>(loc, idxTy, ptr);
     rewriter.replaceOpWithNewOp<mlir::LLVM::ICmpOp>(
         isPresent, mlir::LLVM::ICmpPredicate::ne, addr, c0);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index 14cc7bb511f0f..c8f9c2c154b95 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -21,6 +21,7 @@
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -126,130 +127,6 @@ struct PrivateClauseOpConversion
   }
 };
 
-static mlir::LLVM::LLVMFuncOp getOmpTargetAlloc(mlir::Operation *op) {
-  auto module = op->getParentOfType<mlir::ModuleOp>();
-  if (mlir::LLVM::LLVMFuncOp mallocFunc =
-          module.lookupSymbol<mlir::LLVM::LLVMFuncOp>("omp_target_alloc"))
-    return mallocFunc;
-  mlir::OpBuilder moduleBuilder(module.getBodyRegion());
-  auto i64Ty = mlir::IntegerType::get(module->getContext(), 64);
-  auto i32Ty = mlir::IntegerType::get(module->getContext(), 32);
-  return moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
-      moduleBuilder.getUnknownLoc(), "omp_target_alloc",
-      mlir::LLVM::LLVMFunctionType::get(
-          mlir::LLVM::LLVMPointerType::get(module->getContext()),
-          {i64Ty, i32Ty},
-          /*isVarArg=*/false));
-}
-
-static mlir::Type convertObjectType(const fir::LLVMTypeConverter &converter,
-                                    mlir::Type firType) {
-  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(firType))
-    return converter.convertBoxTypeAsStruct(boxTy);
-  return converter.convertType(firType);
-}
-
-static llvm::SmallVector<mlir::NamedAttribute>
-addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
-                     llvm::ArrayRef<mlir::NamedAttribute> attrs,
-                     int32_t numCallOperands) {
-  llvm::SmallVector<mlir::NamedAttribute> newAttrs;
-  newAttrs.reserve(attrs.size() + 2);
-
-  for (mlir::NamedAttribute attr : attrs) {
-    if (attr.getName() != "operandSegmentSizes")
-      newAttrs.push_back(attr);
-  }
-
-  newAttrs.push_back(rewriter.getNamedAttr(
-      "operandSegmentSizes",
-      rewriter.getDenseI32ArrayAttr({numCallOperands, 0})));
-  newAttrs.push_back(rewriter.getNamedAttr("op_bundle_sizes",
-                                           rewriter.getDenseI32ArrayAttr({})));
-  return newAttrs;
-}
-
-static mlir::LLVM::ConstantOp
-genConstantIndex(mlir::Location loc, mlir::Type ity,
-                 mlir::ConversionPatternRewriter &rewriter,
-                 std::int64_t offset) {
-  auto cattr = rewriter.getI64IntegerAttr(offset);
-  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
-}
-
-static mlir::Value
-computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
-                       mlir::Type idxTy,
-                       mlir::ConversionPatternRewriter &rewriter,
-                       const mlir::DataLayout &dataLayout) {
-  llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
-  unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
-  std::int64_t distance = llvm::alignTo(size, alignment);
-  return genConstantIndex(loc, idxTy, rewriter, distance);
-}
-
-static mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
-                                      mlir::ConversionPatternRewriter &rewriter,
-                                      mlir::Type llTy,
-                                      const mlir::DataLayout &dataLayout) {
-  return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
-}
-
-template <typename OP>
-static mlir::Value
-genAllocationScaleSize(OP op, mlir::Type ity,
-                       mlir::ConversionPatternRewriter &rewriter) {
-  mlir::Location loc = op.getLoc();
-  mlir::Type dataTy = op.getInType();
-  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
-  fir::SequenceType::Extent constSize = 1;
-  if (seqTy) {
-    int constRows = seqTy.getConstantRows();
-    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
-    if (constRows != static_cast<int>(shape.size())) {
-      for (auto extent : shape) {
-        if (constRows-- > 0)
-          continue;
-        if (extent != fir::SequenceType::getUnknownExtent())
-          constSize *= extent;
-      }
-    }
-  }
-
-  if (constSize != 1) {
-    mlir::Value constVal{
-        genConstantIndex(loc, ity, rewriter, constSize).getResult()};
-    return constVal;
-  }
-  return nullptr;
-}
-
-static mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
-                               mlir::Location loc,
-                               mlir::ConversionPatternRewriter &rewriter,
-                               mlir::Type ty, mlir::Value val,
-                               bool fold = false) {
-  auto valTy = val.getType();
-  // If the value was not yet lowered, lower its type so that it can
-  // be used in getPrimitiveTypeSizeInBits.
-  if (!mlir::isa<mlir::IntegerType>(valTy))
-    valTy = converter.convertType(valTy);
-  auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
-  auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy);
-  if (fold) {
-    if (toSize < fromSize)
-      return rewriter.createOrFold<mlir::LLVM::TruncOp>(loc, ty, val);
-    if (toSize > fromSize)
-      return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
-  } else {
-    if (toSize < fromSize)
-      return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
-    if (toSize > fromSize)
-      return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
-  }
-  return val;
-}
-
 // FIR Op specific conversion for TargetAllocMemOp
 struct TargetAllocMemOpConversion
     : public OpenMPFIROpConversion<mlir::omp::TargetAllocMemOp> {
@@ -259,19 +136,16 @@ struct TargetAllocMemOpConversion
   matchAndRewrite(mlir::omp::TargetAllocMemOp allocmemOp, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
     mlir::Type heapTy = allocmemOp.getAllocatedType();
-    mlir::LLVM::LLVMFuncOp mallocFunc = getOmpTargetAlloc(allocmemOp);
     mlir::Location loc = allocmemOp.getLoc();
     auto ity = lowerTy().indexType();
     mlir::Type dataTy = fir::unwrapRefType(heapTy);
-    mlir::Type llvmObjectTy = convertObjectType(lowerTy(), dataTy);
-    mlir::Type llvmPtrTy =
-        mlir::LLVM::LLVMPointerType::get(allocmemOp.getContext(), 0);
+    mlir::Type llvmObjectTy = fir::convertObjectType(lowerTy(), dataTy);
     if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
       TODO(loc, "omp.target_allocmem codegen of derived type with length "
                 "parameters");
-    mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy,
-                                          lowerTy().getDataLayout());
-    if (auto scaleSize = genAllocationScaleSize(allocmemOp, ity, rewriter))
+    mlir::Value size = fir::computeElementDistance(
+        loc, llvmObjectTy, ity, rewriter, lowerTy().getDataLayout());
+    if (auto scaleSize = fir::genAllocationScaleSize(allocmemOp, ity, rewriter))
       size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands().drop_front())
       size = rewriter.create<mlir::LLVM::MulOp>(
@@ -281,13 +155,11 @@ struct TargetAllocMemOpConversion
         mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
     if (mallocTyWidth != ity.getIntOrFloatBitWidth())
       size = integerCast(lowerTy(), loc, rewriter, mallocTy, size);
-    allocmemOp->setAttr("callee", mlir::SymbolRefAttr::get(mallocFunc));
-    auto callOp = rewriter.create<mlir::LLVM::CallOp>(
-        loc, llvmPtrTy,
-        mlir::SmallVector<mlir::Value, 2>({size, allocmemOp.getDevice()}),
-        addLLVMOpBundleAttrs(rewriter, allocmemOp->getAttrs(), 2));
-    rewriter.replaceOpWithNewOp<mlir::LLVM::PtrToIntOp>(
-        allocmemOp, rewriter.getIntegerType(64), callOp.getResult());
+    rewriter.modifyOpInPlace(allocmemOp, [&]() {
+      allocmemOp.setInType(rewriter.getI8Type());
+      allocmemOp.getTypeparamsMutable().clear();
+      allocmemOp.getTypeparamsMutable().append(size);
+    });
     return mlir::success();
   }
 };
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index 5d663e28336c0..dbd42285e8fae 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -50,3 +50,54 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault(
       return componentInfo.getLowerBounds();
   return std::nullopt;
 }
+
+mlir::Type fir::convertObjectType(const fir::LLVMTypeConverter &converter,
+                                  mlir::Type firType) {
+  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(firType))
+    return converter.convertBoxTypeAsStruct(boxTy);
+  return converter.convertType(firType);
+}
+
+mlir::LLVM::ConstantOp
+fir::genConstantIndex(mlir::Location loc, mlir::Type ity,
+                      mlir::ConversionPatternRewriter &rewriter,
+                      std::int64_t offset) {
+  auto cattr = rewriter.getI64IntegerAttr(offset);
+  return rewriter.create<mlir::LLVM::ConstantOp>(loc, ity, cattr);
+}
+
+mlir::Value
+fir::computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
+                            mlir::Type idxTy,
+                            mlir::ConversionPatternRewriter &rewriter,
+                            const mlir::DataLayout &dataLayout) {
+  llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
+  unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
+  std::int64_t distance = llvm::alignTo(size, alignment);
+  return fir::genConstantIndex(loc, idxTy, rewriter, distance);
+}
+
+mlir::Value fir::integerCast(const fir::LLVMTypeConverter &converter,
+                             mlir::Location loc,
+                             mlir::ConversionPatternRewriter &rewriter,
+                             mlir::Type ty, mlir::Value val, bool fold) {
+  auto valTy = val.getType();
+  // If the value was not yet lowered, lower its type so that it can
+  // be used in getPrimitiveTypeSizeInBits.
+  if (!mlir::isa<mlir::IntegerType>(valTy))
+    valTy = converter.convertType(valTy);
+  auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty);
+  auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy);
+  if (fold) {
+    if (toSize < fromSize)
+      return rewriter.createOrFold<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.createOrFold<mlir::LLVM::SExtOp>(loc, ty, val);
+  } else {
+    if (toSize < fromSize)
+      return rewriter.create<mlir::LLVM::TruncOp>(loc, ty, val);
+    if (toSize > fromSize)
+      return rewriter.create<mlir::LLVM::SExtOp>(loc, ty, val);
+  }
+  return val;
+}
diff --git a/flang/test/Fir/omp_target_allocmem_freemem.fir b/flang/test/Fir/omp_target_allocmem_freemem.fir
index 9202202728454..03eb94acb1ac7 100644
--- a/flang/test/Fir/omp_target_allocmem_freemem.fir
+++ b/flang/test/Fir/omp_target_allocmem_freemem.fir
@@ -1,8 +1,106 @@
 // RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
 
-// CHECK-LABEL: define void @omp_target_allocmem_array_of_nonchar(
-// CHECK: call ptr @omp_target_alloc(i64 36, i32 0)
-// CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
+// UNSUPPORTED: system-windows
+// Disabled on 32-bit targets due to the additional `trunc` opcodes required
+// UNSUPPORTED: target-x86
+// UNSUPPORTED: target=sparc-{{.*}}
+// UNSUPPORTED: target=sparcel-{{.*}}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_nonchar() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 4, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_nonchar() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, i32
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalars_nonchar() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 400, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalars_nonchar() -> () {
+  %device = arith.constant 0 : i32
+  %0 = arith.constant 100 : index
+  %1 = omp.target_allocmem %device : i32, i32, %0
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_char() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 10, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_char() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<1,10>
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_char_kind() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 20, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_char_kind() -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<2,10>
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_dynchar(
+// CHECK-SAME: i32 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_dynchar(%l : i32) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<1,?>(%l : i32)
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_scalar_dynchar_kind(
+// CHECK-SAME: i32 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 2, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_scalar_dynchar_kind(%l : i32) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.char<2,?>(%l : i32)
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_nonchar() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 36, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
 func.func @omp_target_allocmem_array_of_nonchar() -> () {
   %device = arith.constant 0 : i32
   %1 = omp.target_allocmem %device : i32, !fir.array<3x3xi32>
@@ -10,9 +108,12 @@ func.func @omp_target_allocmem_array_of_nonchar() -> () {
   return
 }
 
-// CHECK-LABEL: define void @omp_target_allocmem_array_of_char(
-// CHECK: call ptr @omp_target_alloc(i64 90, i32 0)
-// CHECK: call void @omp_target_free(ptr {{.*}}, i32 0)
+// CHECK-LABEL: define void @omp_target_allocmem_array_of_char() {
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @omp_target_alloc(i64 90, i32 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP3]], i32 0)
+// CHECK-NEXT:    ret void
 func.func @omp_target_allocmem_array_of_char() -> () {
   %device = arith.constant 0 : i32
   %1 = omp.target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,10>>
@@ -21,13 +122,173 @@ func.func @omp_target_allocmem_array_of_char() -> () {
 }
 
 // CHECK-LABEL: define void @omp_target_allocmem_array_of_dynchar(
-// CHECK-SAME: i32 %[[len:.*]])
-// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
-// CHECK: %[[mul2:.*]] = mul i64 9, %[[mul1]]
-// CHECK: call ptr @omp_target_alloc(i64 %[[mul2]], i32 0)
+// CHECK-SAME: i32 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 9, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
 func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> () {
   %device = arith.constant 0 : i32
   %1 = omp.target_allocmem %device : i32, !fir.array<3x3x!fir.char<1,?>>(%l : i32)
   omp.target_freemem %device, %1 : i32, i64
   return
 }
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_nonchar(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 12, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP6]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_nonchar(%e: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?xi32>, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_nonchar2(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 4, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_nonchar2(%e: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<?x?xi32>, %e, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_char(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 60, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP6]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_char(%e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x!fir.char<2,10>>, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_char2(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 20, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 1, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
+// CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP7]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_char2(%e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<?x?x!fir.char<2,10>>, %e, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_dynchar(
+// CHECK-SAME: i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 6, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]]
+// CHECK-NEXT:    [[TMP6:%.*]] = mul i64 1, [[TMP5]]
+// CHECK-NEXT:    [[TMP7:%.*]] = call ptr @omp_target_alloc(i64 [[TMP6]], i32 0)
+// CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+// CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP9]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_dynchar(%l: i32, %e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x!fir.char<2,?>>(%l : i32), %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_dynchar2(
+// CHECK-SAME: i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 2, [[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]]
+// CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP1]]
+// CHECK-NEXT:    [[TMP7:%.*]] = mul i64 1, [[TMP6]]
+// CHECK-NEXT:    [[TMP8:%.*]] = call ptr @omp_target_alloc(i64 [[TMP7]], i32 0)
+// CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+// CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP10]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_dynarray_of_dynchar2(%l: i32, %e : index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<?x?x!fir.char<2,?>>(%l : i32), %e, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_with_holes_nonchar(
+// CHECK-SAME: i64 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 240, [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 1, [[TMP4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = call ptr @omp_target_alloc(i64 [[TMP5]], i32 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP8]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_with_holes_nonchar(%0 : index, %1 : index) -> () {
+  %device = arith.constant 0 : i32
+  %2 = omp.target_allocmem %device : i32, !fir.array<4x?x3x?x5xi32>, %0, %1
+  omp.target_freemem %device, %2 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_with_holes_char(
+// CHECK-SAME: i64 [[TMP0:%.*]]) {
+// CHECK-NEXT:    [[TMP2:%.*]] = mul i64 240, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64
+// CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP6]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_with_holes_char(%e: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x4x!fir.char<2,10>>, %e
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
+
+// CHECK-LABEL: define void @omp_target_allocmem_array_with_holes_dynchar(
+// CHECK-SAME: i64 [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+// CHECK-NEXT:    [[TMP3:%.*]] = mul i64 24, [[TMP0]]
+// CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+// CHECK-NEXT:    [[TMP5:%.*]] = mul i64 1, [[TMP4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = call ptr @omp_target_alloc(i64 [[TMP5]], i32 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+// CHECK-NEXT:    call void @omp_target_free(ptr [[TMP8]], i32 0)
+// CHECK-NEXT:    ret void
+func.func @omp_target_allocmem_array_with_holes_dynchar(%arg0: index, %arg1: index) -> () {
+  %device = arith.constant 0 : i32
+  %1 = omp.target_allocmem %device : i32, !fir.array<3x?x4x!fir.char<2,?>>(%arg0 : index), %arg1
+  omp.target_freemem %device, %1 : i32, i64
+  return
+}
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index e81f57f404ea0..f81e8eac97234 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2138,8 +2138,8 @@ def TargetAllocMemOp : OpenMP_Op<"target_allocmem",
     TypeAttr:$in_type,
     OptionalAttr<StrAttr>:$uniq_name,
     OptionalAttr<StrAttr>:$bindc_name,
-    Variadic<AnyInteger>:$typeparams,
-    Variadic<AnyInteger>:$shape
+    Variadic<IntLikeType>:$typeparams,
+    Variadic<IntLikeType>:$shape
   );
   let results = (outs I64);
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index dfd2f5f275a07..49a26d8cd156f 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3936,7 +3936,6 @@ static mlir::ParseResult parseTargetAllocMemOp(mlir::OpAsmParser &parser,
     return mlir::failure();
 
   mlir::Type restype = builder.getIntegerType(64);
-  ;
   if (!restype) {
     parser.emitError(parser.getNameLoc(), "invalid allocate type: ") << intype;
     return mlir::failure();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f805c286ec4f8..145433faaf5c9 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -5888,7 +5888,10 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
   mlir::Type heapTy = allocMemOp.getAllocatedType();
   llvm::Type *llvmHeapTy = moduleTranslation.convertType(heapTy);
   llvm::TypeSize typeSize = dataLayout.getTypeStoreSize(llvmHeapTy);
-  llvm::ConstantInt *allocSize = builder.getInt64(typeSize.getFixedValue());
+  llvm::Value *allocSize = builder.getInt64(typeSize.getFixedValue());
+  for (auto typeParam : allocMemOp.getTypeparams())
+    allocSize =
+        builder.CreateMul(allocSize, moduleTranslation.lookupValue(typeParam));
   // Create call to "omp_target_alloc" with the args as translated llvm values.
   llvm::CallInst *call =
       builder.CreateCall(ompTargetAllocFunc, {allocSize, llvmDeviceNum});

>From 6ef5914b24fcb43c1a5cacad65a766309f2f7020 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 16 Jul 2025 21:50:32 +0530
Subject: [PATCH 8/8] [OpenMP] Update description for target_allocmem and
 target_freemem.

---
 flang/include/flang/Optimizer/Support/Utils.h | 31 ++---------
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 36 +++++++------
 flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp |  3 +-
 flang/lib/Optimizer/Support/Utils.cpp         | 27 ++++++++++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 52 +++++++++++++++----
 5 files changed, 93 insertions(+), 56 deletions(-)

diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 002261e61adb0..ae1efb9833649 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -224,34 +224,9 @@ mlir::Value computeElementDistance(mlir::Location loc,
 // We do this for arrays without a constant interior or arrays of character with
 // dynamic length arrays, since those are the only ones that get decayed to a
 // pointer to the element type.
-template <typename OP>
-inline mlir::Value
-genAllocationScaleSize(OP op, mlir::Type ity,
-                       mlir::ConversionPatternRewriter &rewriter) {
-  mlir::Location loc = op.getLoc();
-  mlir::Type dataTy = op.getInType();
-  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
-  fir::SequenceType::Extent constSize = 1;
-  if (seqTy) {
-    int constRows = seqTy.getConstantRows();
-    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
-    if (constRows != static_cast<int>(shape.size())) {
-      for (auto extent : shape) {
-        if (constRows-- > 0)
-          continue;
-        if (extent != fir::SequenceType::getUnknownExtent())
-          constSize *= extent;
-      }
-    }
-  }
-
-  if (constSize != 1) {
-    mlir::Value constVal{
-        fir::genConstantIndex(loc, ity, rewriter, constSize).getResult()};
-    return constVal;
-  }
-  return nullptr;
-}
+mlir::Value genAllocationScaleSize(mlir::Location loc, mlir::Type dataTy,
+                                   mlir::Type ity,
+                                   mlir::ConversionPatternRewriter &rewriter);
 
 /// Perform an extension or truncation as needed on an integer value. Lowering
 /// to the specific target may involve some sign-extending or truncation of
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4cc1084aa56f8..50c1765b12409 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -261,7 +261,8 @@ struct AllocaOpConversion : public fir::FIROpConversion<fir::AllocaOp> {
                << scalarType << " with type parameters";
       }
     }
-    if (auto scaleSize = fir::genAllocationScaleSize(alloc, ity, rewriter))
+    if (auto scaleSize = fir::genAllocationScaleSize(
+            alloc.getLoc(), alloc.getInType(), ity, rewriter))
       size =
           rewriter.createOrFold<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     if (alloc.hasShapeOperands()) {
@@ -1061,7 +1062,8 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
       TODO(loc, "fir.allocmem codegen of derived type with length parameters");
     mlir::Value size = genTypeSizeInBytes(loc, ity, rewriter, llvmObjectTy);
-    if (auto scaleSize = fir::genAllocationScaleSize(heap, ity, rewriter))
+    if (auto scaleSize =
+            fir::genAllocationScaleSize(loc, heap.getInType(), ity, rewriter))
       size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands())
       size = rewriter.create<mlir::LLVM::MulOp>(
@@ -4274,20 +4276,22 @@ void fir::populateFIRToLLVMConversionPatterns(
       BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
       CmpcOpConversion, VolatileCastOpConversion, ConvertOpConversion,
       CoordinateOpConversion, CopyOpConversion, DTEntryOpConversion,
-      DeclareOpConversion, DivcOpConversion, EmboxOpConversion,
-      EmboxCharOpConversion, EmboxProcOpConversion, ExtractValueOpConversion,
-      FieldIndexOpConversion, FirEndOpConversion, FreeMemOpConversion,
-      GlobalLenOpConversion, GlobalOpConversion, InsertOnRangeOpConversion,
-      IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion,
-      LocalitySpecifierOpConversion, MulcOpConversion, NegcOpConversion,
-      NoReassocOpConversion, SelectCaseOpConversion, SelectOpConversion,
-      SelectRankOpConversion, SelectTypeOpConversion, ShapeOpConversion,
-      ShapeShiftOpConversion, ShiftOpConversion, SliceOpConversion,
-      StoreOpConversion, StringLitOpConversion, SubcOpConversion,
-      TypeDescOpConversion, TypeInfoOpConversion, UnboxCharOpConversion,
-      UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion,
-      XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion,
-      ZeroOpConversion>(converter, options);
+      DeclareOpConversion,
+      DoConcurrentSpecifierOpConversion<fir::LocalitySpecifierOp>,
+      DoConcurrentSpecifierOpConversion<fir::DeclareReductionOp>,
+      DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
+      EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
+      FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
+      GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
+      LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
+      NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
+      SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
+      ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
+      SliceOpConversion, StoreOpConversion, StringLitOpConversion,
+      SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
+      UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
+      UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion,
+      XReboxOpConversion, ZeroOpConversion>(converter, options);
 
   // Patterns that are populated without a type converter do not trigger
   // target materializations for the operands of the root op.
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index c8f9c2c154b95..b2c6b880c6f52 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -145,7 +145,8 @@ struct TargetAllocMemOpConversion
                 "parameters");
     mlir::Value size = fir::computeElementDistance(
         loc, llvmObjectTy, ity, rewriter, lowerTy().getDataLayout());
-    if (auto scaleSize = fir::genAllocationScaleSize(allocmemOp, ity, rewriter))
+    if (auto scaleSize = fir::genAllocationScaleSize(
+            loc, allocmemOp.getInType(), ity, rewriter))
       size = rewriter.create<mlir::LLVM::MulOp>(loc, ity, size, scaleSize);
     for (mlir::Value opnd : adaptor.getOperands().drop_front())
       size = rewriter.create<mlir::LLVM::MulOp>(
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index dbd42285e8fae..6dc80ff8d18a6 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -77,6 +77,33 @@ fir::computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
   return fir::genConstantIndex(loc, idxTy, rewriter, distance);
 }
 
+mlir::Value
+fir::genAllocationScaleSize(mlir::Location loc, mlir::Type dataTy,
+                            mlir::Type ity,
+                            mlir::ConversionPatternRewriter &rewriter) {
+  auto seqTy = mlir::dyn_cast<fir::SequenceType>(dataTy);
+  fir::SequenceType::Extent constSize = 1;
+  if (seqTy) {
+    int constRows = seqTy.getConstantRows();
+    const fir::SequenceType::ShapeRef &shape = seqTy.getShape();
+    if (constRows != static_cast<int>(shape.size())) {
+      for (auto extent : shape) {
+        if (constRows-- > 0)
+          continue;
+        if (extent != fir::SequenceType::getUnknownExtent())
+          constSize *= extent;
+      }
+    }
+  }
+
+  if (constSize != 1) {
+    mlir::Value constVal{
+        fir::genConstantIndex(loc, ity, rewriter, constSize).getResult()};
+    return constVal;
+  }
+  return nullptr;
+}
+
 mlir::Value fir::integerCast(const fir::LLVMTypeConverter &converter,
                              mlir::Location loc,
                              mlir::ConversionPatternRewriter &rewriter,
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index f81e8eac97234..57ddc41e4ed9b 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2127,10 +2127,31 @@ def TargetAllocMemOp : OpenMP_Op<"target_allocmem",
     The memory is uninitialized after allocation. Operations must be paired with 
     `omp.target_freemem` to avoid memory leaks.
 
-    ```mlir
-      %device = arith.constant 0 : i32
-      %ptr = omp.target_allocmem %device : i32, vector<3x3xi32>
-    ```
+    * `$device`: The integer ID of the OpenMP device where the memory will be allocated.
+    * `$in_type`: The type of the object for which memory is being allocated. 
+      For arrays, this can be a static or dynamic array type.
+    * `$uniq_name`: An optional unique name for the allocated memory.
+    * `$bindc_name`: An optional name used for C interoperability.
+    * `$typeparams`: Runtime type parameters for polymorphic or parameterized types. 
+      These are typically integer values that define aspects of a type not fixed at compile time.
+    * `$shape`: Runtime shape operands for dynamic arrays. 
+      Each operand is an integer value representing the extent of a specific dimension. 
+
+  ```mlir
+    // Allocate a static 3x3 integer vector on device 0
+    %device_0 = arith.constant 0 : i32
+    %ptr_static = omp.target_allocmem %device_0 : i32, vector<3x3xi32>
+    // ... use %ptr_static ...
+    omp.target_freemem %device_0, %ptr_static : i32, i64
+
+    // Allocate a dynamic 2D Fortran array (fir.array) on device 1
+    %device_1 = arith.constant 1 : i32
+    %rows = arith.constant 10 : index
+    %cols = arith.constant 20 : index
+    %ptr_dynamic = omp.target_allocmem %device_1 : i32, !fir.array<?x?xf32>, %rows, %cols : index, index
+    // ... use %ptr_dynamic ...
+    omp.target_freemem %device_1, %ptr_dynamic : i32, i64
+  ```
   }];
 
   let arguments = (ins
@@ -2160,13 +2181,22 @@ def TargetFreeMemOp : OpenMP_Op<"target_freemem",
   let summary = "free memory on an openmp device";
 
   let description = [{
-    Deallocates memory on the specified OpenMP device that was previously 
-    allocated by an `omp.target_allocmem` operation. The memory is placed 
-    in an undefined state after deallocation.
-    ```
-      %device = arith.constant 0 : i32
-      %ptr = omp.target_allocmem %device : i32, vector<3x3xi32>
-      omp.target_freemem %device, %ptr : i32, i64
+    Deallocates memory on the specified OpenMP device that was previously
+    allocated by an `omp.target_allocmem` operation. After this operation, the
+    deallocated memory is in an undefined state and should not be accessed.
+    It is crucial to ensure that all accesses to the memory region are completed
+    before `omp.target_freemem` is called to avoid undefined behavior.
+
+    * `$device`: The integer ID of the OpenMP device from which the memory will be freed.
+    * `$heapref`: The integer value representing the device pointer to the memory
+      to be deallocated, which was previously returned by `omp.target_allocmem`.
+
+    ```mlir
+      // Example of allocating and freeing memory on an OpenMP device
+      %device_id = arith.constant 0 : i32
+      %allocated_ptr = omp.target_allocmem %device_id : i32, vector<3x3xi32>
+      // ... operations using %allocated_ptr on the device ...
+      omp.target_freemem %device_id, %allocated_ptr : i32, i64
     ```
   }];