[Mlir-commits] [flang] [mlir] [mlir][OpenMP] - MLIR to LLVMIR translation support for delayed privatization of allocatables in `omp.target` ops (PR #113208)

Pranav Bhandarkar llvmlistbot at llvm.org
Mon Oct 21 12:38:49 PDT 2024


https://github.com/bhandarkar-pranav created https://github.com/llvm/llvm-project/pull/113208

This PR adds support to translate the `private` clause from MLIR to LLVMIR when used on allocatables in the context of an `omp.target` op.

>From ddb41ee6c0f6d6b2a832388c4e3b49b845f092c5 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 23 Sep 2024 16:59:12 -0500
Subject: [PATCH 1/6] fix failures seen in
 target-private-multiple-variables.f90

---
 flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 2fa55844aec7c7..82402dd09e83d5 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -113,6 +113,11 @@ class MapsForPrivatizedSymbolsPass
     for (auto mapInfoOp : mapInfoOps)
       addMapInfoOp(targetOp, mapInfoOp);
   }
+  void addMapInfoOps(omp::TargetOp targetOp,
+                     llvm::SmallVectorImpl<omp::MapInfoOp> &mapInfoOps) {
+    for (auto mapInfoOp : mapInfoOps)
+      addMapInfoOp(targetOp, mapInfoOp);
+  }
   void runOnOperation() override {
     ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
     fir::KindMapping kindMap = fir::getKindMapping(module);

>From 2821cdf8e9c4db04d55ad502012954e8958536ca Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 10 Oct 2024 14:08:57 -0500
Subject: [PATCH 2/6] Fix lit tests on the flang/maps_for_privatized_symbols
 pass

---
 flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 82402dd09e83d5..d86dcd54597ff5 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -142,7 +142,9 @@ class MapsForPrivatizedSymbolsPass
         }
         builder.setInsertionPoint(targetOp);
         Location loc = targetOp.getLoc();
+        llvm::errs() << "Here\n";
         omp::MapInfoOp mapInfoOp = createMapInfo(loc, privVar, builder);
+        llvm::errs() << "Here again\n";
         mapInfoOps.push_back(mapInfoOp);
         LLVM_DEBUG(llvm::dbgs() << "MapsForPrivatizedSymbolsPass created ->\n");
         LLVM_DEBUG(mapInfoOp.dump());

>From 17ecdba5811fc7ecec8962528352b6555a4a19db Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 10 Oct 2024 14:43:38 -0500
Subject: [PATCH 3/6] Use BlockArgOpenMPOpInterface and remove some debug
 prints

---
 flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index d86dcd54597ff5..82402dd09e83d5 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -142,9 +142,7 @@ class MapsForPrivatizedSymbolsPass
         }
         builder.setInsertionPoint(targetOp);
         Location loc = targetOp.getLoc();
-        llvm::errs() << "Here\n";
         omp::MapInfoOp mapInfoOp = createMapInfo(loc, privVar, builder);
-        llvm::errs() << "Here again\n";
         mapInfoOps.push_back(mapInfoOp);
         LLVM_DEBUG(llvm::dbgs() << "MapsForPrivatizedSymbolsPass created ->\n");
         LLVM_DEBUG(mapInfoOp.dump());

>From 423fe88fde070c9d635460c649caef369677b18f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 17 Oct 2024 00:52:09 -0500
Subject: [PATCH 4/6] Handle allocatables and add lit tests

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 169 +++++++++++++-
 .../openmp-target-multiple-private.mlir       | 116 ++++++++++
 .../openmp-target-private-allocatable.mlir    | 207 ++++++++++++++++++
 .../Target/LLVMIR/openmp-target-private.mlir  |  90 ++++++++
 4 files changed, 574 insertions(+), 8 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7c45e89cd8ac4b..83d07d10b4102a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -41,6 +41,7 @@
 #include <optional>
 #include <utility>
 
+#define DEBUG_TYPE "openmp-to-llvm-ir-translation"
 using namespace mlir;
 
 namespace {
@@ -3316,7 +3317,56 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
 
   return builder.saveIP();
 }
+static bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) {
+  Region &allocRegion = privatizer.getAllocRegion();
+  Value blockArg0 = allocRegion.getArgument(0);
+  return !blockArg0.use_empty();
+}
+static llvm::Value *
+findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp,
+                        llvm::DenseMap<Value, int> &mappedPrivateVars,
+                        llvm::IRBuilderBase &builder,
+                        LLVM::ModuleTranslation &moduleTranslation) {
+  if (mappedPrivateVars.contains(privateVar)) {
+    int blockArgIndex = mappedPrivateVars[privateVar];
+    Value blockArg = targetOp.getRegion().getArgument(blockArgIndex);
+    mlir::Type privVarType = privateVar.getType();
+    mlir::Type blockArgType = blockArg.getType();
+    assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
+           "A block argument corresponding to a mapped var should have "
+           "!llvm.ptr type");
+
+    LLVM_DEBUG(llvm::dbgs() << "privVarType = ");
+    LLVM_DEBUG(privVarType.dump());
+    LLVM_DEBUG(llvm::dbgs() << "\n");
+    LLVM_DEBUG(llvm::dbgs() << "blockArgType = ");
+    LLVM_DEBUG(blockArg.getType().dump());
+    LLVM_DEBUG(llvm::dbgs() << "\n");
+
+    if (privVarType == blockArg.getType()) {
+      llvm::Value *v = moduleTranslation.lookupValue(blockArg);
+      LLVM_DEBUG(llvm::dbgs() << "Host Associated Value : ");
+      LLVM_DEBUG(v->dump());
+      return v;
+    }
 
+    if (!isa<LLVM::LLVMPointerType>(privVarType)) {
+      // This typically happens when the privatized type is lowered from
+      // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
+      // struct/pair is passed by value. But, mapped values are passed only as
+      // pointers, so before we privatize, we must load the pointer.
+      llvm::Value *load =
+          builder.CreateLoad(moduleTranslation.convertType(privVarType),
+                             moduleTranslation.lookupValue(blockArg));
+      LLVM_DEBUG(llvm::dbgs() << "Host Associated Value : ");
+      LLVM_DEBUG(load->dump());
+      return load;
+    }
+  }
+  LLVM_DEBUG(llvm::dbgs() << "Host Associated Value : ");
+  LLVM_DEBUG(moduleTranslation.lookupValue(privateVar)->dump());
+  return moduleTranslation.lookupValue(privateVar);
+}
 static LogicalResult
 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
@@ -3329,6 +3379,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
   auto targetOp = cast<omp::TargetOp>(opInst);
   auto &targetRegion = targetOp.getRegion();
+  // Holds the private vars that have been mapped along with
+  // the block argument that corresponds to the MapInfoOp
+  // corresponding to the private var in question.
+  // So, for instance
+  //
+  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
+  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
+  //
+  // Then, %10 has been created so that the descriptor can be used by the
+  // privatizer
+  // @box.privatizer on the device side. Here we'd record {%6#0, 0} in the
+  // mappedPrivateVars map.
+  llvm::DenseMap<Value, int> mappedPrivateVars;
   DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
   SmallVector<Value> mapVars = targetOp.getMapVars();
   ArrayRef<BlockArgument> mapBlockArgs =
@@ -3340,6 +3403,66 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   bool isOffloadEntry =
       isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
 
+  // For some private variables, the MapsForPrivatizedVariablesPass
+  // creates MapInfoOp instances. Go through the private variables and
+  // the mapped variables so that during codegeneration we are able
+  // to quickly look up the corresponding map variable, if any for each
+  // private variable.
+  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
+    auto argIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
+    unsigned lastMapBlockArgsIdx =
+        argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs() - 1;
+    OperandRange privateVars = targetOp.getPrivateVars();
+    std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
+    auto reverseIt = mapVars.rbegin();
+    LLVM_DEBUG(llvm::dbgs() << "****Private Vars*******\n");
+    for (auto [privVar, privSym] :
+         llvm::reverse(llvm::zip_equal(privateVars, *privateSyms))) {
+      LLVM_DEBUG(llvm::dbgs() << "-- {\n");
+      LLVM_DEBUG(privVar.dump());
+      LLVM_DEBUG(llvm::dbgs() << "Type:-> ");
+      LLVM_DEBUG(privVar.getType().dump());
+      SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
+      LLVM_DEBUG(llvm::dbgs() << "Privatizer: " << privatizerName << "\n}");
+
+      omp::PrivateClauseOp privatizer =
+          SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
+              targetOp, privatizerName);
+      if (!privatizerNeedsMap(privatizer)) {
+        LLVM_DEBUG(llvm::dbgs() << "    - DOES NOT need map\n");
+        continue;
+      }
+      LLVM_DEBUG(llvm::dbgs() << "      - NEEDS map");
+      LLVM_DEBUG(llvm::dbgs() << " -> Mapped Arg : -> ");
+      LLVM_DEBUG((*reverseIt).dump());
+      LLVM_DEBUG(llvm::dbgs() << " ->  Block Arg:  -> ");
+      LLVM_DEBUG(targetOp.getRegion().getArgument(lastMapBlockArgsIdx).dump());
+      LLVM_DEBUG(llvm::dbgs() << "\n");
+
+      // The MapInfoOp defining the map var isn't really needed later.
+      // We'll do some sanity checks on it right now.
+      omp::MapInfoOp mapInfoOp =
+          llvm::cast<omp::MapInfoOp>((*reverseIt).getDefiningOp());
+      Type varType = mapInfoOp.getVarType();
+      LLVM_DEBUG(llvm::dbgs() << "mapInfoOp.getVarType() = ");
+      LLVM_DEBUG(varType.dump());
+      LLVM_DEBUG(llvm::dbgs() << "\n");
+
+      if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
+        assert(
+            varType == privVar.getType() &&
+            "Type of private var doesn't match the type of the mapped value");
+      mappedPrivateVars.insert({privVar, lastMapBlockArgsIdx});
+      lastMapBlockArgsIdx--;
+      reverseIt++;
+    }
+  }
+  for (auto [privVar, blockArgIdx] : mappedPrivateVars) {
+    LLVM_DEBUG(llvm::dbgs() << "*****Private->MappedVars****\n");
+    LLVM_DEBUG(privVar.dump());
+    LLVM_DEBUG(llvm::dbgs() << "->\n");
+    LLVM_DEBUG(targetRegion.getArgument(blockArgIdx).dump());
+  }
   LogicalResult bodyGenStatus = success();
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto bodyCB = [&](InsertPointTy allocaIP,
@@ -3365,11 +3488,21 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
       llvm::Value *mapOpValue =
           moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
+      LLVM_DEBUG(llvm::dbgs()
+                 << "ModuleTranslation Mapping Table for mapVars\n");
+      LLVM_DEBUG(arg.dump());
+      LLVM_DEBUG(llvm::dbgs() << "---->"; mapOpValue->dump());
+      LLVM_DEBUG(llvm::dbgs() << "\n");
       moduleTranslation.mapValue(arg, mapOpValue);
     }
-
+    LLVM_DEBUG(llvm::dbgs() << "LLVM Module before privatization\n");
+    LLVM_DEBUG(llvm::dbgs() << "------------------------------------\n");
+    LLVM_DEBUG(builder.GetInsertBlock()->getParent()->getParent()->dump());
+    LLVM_DEBUG(llvm::dbgs() << "------------------------------------\n");
     // Do privatization after moduleTranslation has already recorded
     // mapped values.
+    SmallVector<llvm::Value *> llvmPrivateVars;
+    SmallVector<Region *> privateCleanupRegions;
     if (!targetOp.getPrivateVars().empty()) {
       builder.restoreIP(allocaIP);
 
@@ -3384,16 +3517,18 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
         SymbolRefAttr privSym = cast<SymbolRefAttr>(privatizerNameAttr);
         omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
         if (privatizer.getDataSharingType() ==
-                omp::DataSharingClauseType::FirstPrivate ||
-            !privatizer.getDeallocRegion().empty()) {
+            omp::DataSharingClauseType::FirstPrivate) {
           opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
-                           "failed because translation of firstprivate and "
-                           " private allocatables is not supported yet");
+                           "failed because translation of firstprivate is not "
+                           "supported yet");
           bodyGenStatus = failure();
         } else {
-          moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
-                                     moduleTranslation.lookupValue(privVar));
           Region &allocRegion = privatizer.getAllocRegion();
+          BlockArgument allocRegionArg = allocRegion.getArgument(0);
+          moduleTranslation.mapValue(
+              allocRegionArg,
+              findHostAssociatedValue(privVar, targetOp, mappedPrivateVars,
+                                      builder, moduleTranslation));
           SmallVector<llvm::Value *, 1> yieldedValues;
           if (failed(inlineConvertOmpRegions(
                   allocRegion, "omp.targetop.privatizer", builder,
@@ -3404,7 +3539,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
             bodyGenStatus = failure();
           } else {
             assert(yieldedValues.size() == 1);
-            moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
+            llvm::Value *llvmReplacementValue = yieldedValues.front();
+            moduleTranslation.mapValue(privBlockArg, llvmReplacementValue);
+            if (!privatizer.getDeallocRegion().empty()) {
+              llvmPrivateVars.push_back(llvmReplacementValue);
+              privateCleanupRegions.push_back(&privatizer.getDeallocRegion());
+            }
           }
           moduleTranslation.forgetMapping(allocRegion);
           builder.restoreIP(builder.saveIP());
@@ -3414,6 +3554,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
     llvm::BasicBlock *exitBlock = convertOmpOpRegions(
         targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
     builder.SetInsertPoint(exitBlock);
+    if (!llvmPrivateVars.empty()) {
+      assert(llvmPrivateVars.size() == privateCleanupRegions.size() &&
+             "Number of private variables needing cleanup not equal to number"
+             "of privatizers with dealloc regions");
+      if (failed(inlineOmpRegionCleanup(
+              privateCleanupRegions, llvmPrivateVars, moduleTranslation,
+              builder, "omp.targetop.private.cleanup",
+              /*shouldLoadCleanupRegionArg=*/false))) {
+        opInst.emitError("failed to inline `dealloc` region of `omp.private` "
+                         "op in the target region");
+        bodyGenStatus = failure();
+      }
+    }
     return builder.saveIP();
   };
 
diff --git a/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
new file mode 100644
index 00000000000000..2491747152895c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
@@ -0,0 +1,116 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @free(!llvm.ptr)
+llvm.func @malloc(i64) -> !llvm.ptr
+omp.private {type = private} @box.heap_privatizer0 : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %10 = llvm.getelementptr %arg0[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %1 = llvm.load %10 : !llvm.ptr -> i64
+  %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>  : (i32) -> !llvm.ptr
+  %17 = llvm.call @malloc(%1) {fir.must_be_heap = true, in_type = i32} : (i64) -> !llvm.ptr
+  %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %37 = llvm.insertvalue %17, %22[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  llvm.store %37, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  omp.yield(%7 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+  %6 = llvm.mlir.constant(0 : i64) : i64
+  %8 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr
+  llvm.call @free(%9) : (!llvm.ptr) -> ()
+  omp.yield
+}
+omp.private {type = private} @box.heap_privatizer1 : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %10 = llvm.getelementptr %arg0[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %1 = llvm.load %10 : !llvm.ptr -> i64
+  %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>  : (i32) -> !llvm.ptr
+  %17 = llvm.call @malloc(%1) {fir.must_be_heap = true, in_type = i32} : (i64) -> !llvm.ptr
+  %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %37 = llvm.insertvalue %17, %22[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  llvm.store %37, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  omp.yield(%7 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+  %6 = llvm.mlir.constant(0 : i64) : i64
+  %8 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr
+  llvm.call @free(%9) : (!llvm.ptr) -> ()
+  omp.yield
+}
+llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} {
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
+  %13 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var0"} : (i64) -> !llvm.ptr
+  %14 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var1"} : (i64) -> !llvm.ptr
+  %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
+  %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  %55 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  omp.target map_entries(%53 -> %arg3, %54 -> %arg4, %55 ->%arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer0 %13 -> %arg6, @box.heap_privatizer1 %14 -> %arg7 : !llvm.ptr, !llvm.ptr) {
+    %64 = llvm.mlir.constant(1 : i32) : i32
+    %65 = llvm.alloca %64 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+    %67 = llvm.alloca %64 x i32 : (i32) -> !llvm.ptr
+    %66 = llvm.mlir.constant(19 : i32) : i32
+    %68 = llvm.mlir.constant(18 : i32) : i32
+    %69 = llvm.mlir.constant(10 : i32) : i32
+    %70 = llvm.mlir.constant(5 : i32) : i32
+    llvm.store %70, %arg3 : i32, !llvm.ptr
+    llvm.store %69, %67 : i32, !llvm.ptr
+    %75 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+    %90 = llvm.insertvalue %67, %75[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    llvm.store %90, %65 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+    %91 = llvm.mlir.zero : !llvm.ptr
+    %92 = llvm.call @_FortranAAssign(%arg6, %65, %91, %68) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()>
+    %93 = llvm.call @_FortranAAssign(%arg7, %65, %91, %66) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()>
+    omp.terminator
+  }
+  llvm.return
+}
+
+
+llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"}
+
+// The first set of checks ensure that we are calling the offloaded function
+// with the right arguments, especially the second argument which needs to
+// be a memory reference to the descriptor for the privatized allocatable
+// CHECK: define void @target_allocatable_
+// CHECK-NOT: define internal void
+// CHECK: %[[DESC_ALLOC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
+// CHECK: %[[DESC_ALLOC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
+// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}},
+// CHECK-SAME: ptr %[[DESC_ALLOC0]], ptr %[[DESC_ALLOC1]])
+
+// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]]
+// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG0:[^,]+]],
+// CHECK-SAME: ptr %[[DESCRIPTOR_ARG1:.*]]) {
+// CHECK: %[[I0:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 },
+// CHECK-SAME: ptr %[[DESCRIPTOR_ARG0]], i32 0, i32 1
+// CHECK: %[[MALLOC_ARG0:.*]] = load i64, ptr %[[I0]]
+// CHECK: %[[PRIV_DESC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK: %[[HEAP_PTR0:.*]] = call ptr @malloc(i64 %[[MALLOC_ARG0]])
+// CHECK:  %[[TMP0:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK-SAME: undef, ptr %[[HEAP_PTR0]], 0
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[TMP0]], ptr %[[PRIV_DESC0]]
+
+// CHECK: %[[I1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 },
+// CHECK-SAME: ptr %[[DESCRIPTOR_ARG1]], i32 0, i32 1
+// CHECK: %[[MALLOC_ARG1:.*]] = load i64, ptr %[[I1]]
+// CHECK: %[[PRIV_DESC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK: %[[HEAP_PTR1:.*]] = call ptr @malloc(i64 %[[MALLOC_ARG1]])
+// CHECK:  %[[TMP1:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK-SAME: undef, ptr %[[HEAP_PTR1]], 0
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[TMP1]], ptr %[[PRIV_DESC1]]
+
+// CHECK: call {} @_FortranAAssign(ptr %[[PRIV_DESC0]]
+// CHECK: call {} @_FortranAAssign(ptr %[[PRIV_DESC1]]
+
+// CHECK:  %[[PTR0:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 },
+// CHECK-SAME: ptr %[[PRIV_DESC0]], i32 0, i32 0
+// CHECK: %[[HEAP_MEMREF0:.*]] = load ptr, ptr %[[PTR0]]
+// CHECK:  call void @free(ptr %[[HEAP_MEMREF0]])
+// CHECK:  %[[PTR1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 },
+// CHECK-SAME: ptr %[[PRIV_DESC1]], i32 0, i32 0
+// CHECK: %[[HEAP_MEMREF1:.*]] = load ptr, ptr %[[PTR1]]
+// CHECK:  call void @free(ptr %[[HEAP_MEMREF1]])
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
new file mode 100644
index 00000000000000..6d65b25a7fbc01
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
@@ -0,0 +1,207 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @free(!llvm.ptr)
+llvm.func @malloc(i64) -> !llvm.ptr
+omp.private {type = private} @box.heap_privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var", pinned} : (i32) -> !llvm.ptr
+  %8 = llvm.mlir.constant(0 : i64) : i64
+  %10 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr
+  %12 = llvm.ptrtoint %11 : !llvm.ptr to i64
+  %13 = llvm.icmp "ne" %12, %8 : i64
+  llvm.cond_br %13, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  %14 = llvm.mlir.zero : !llvm.ptr
+  %16 = llvm.ptrtoint %14 : !llvm.ptr to i64
+  %17 = llvm.call @malloc(%16) {fir.must_be_heap = true, in_type = i32} : (i64) -> !llvm.ptr
+  %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %37 = llvm.insertvalue %17, %22[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  llvm.store %37, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  llvm.br ^bb3
+^bb2:  // pred: ^bb0
+  %39 = llvm.mlir.zero : !llvm.ptr
+  %44 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %59 = llvm.insertvalue %39, %44[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  llvm.store %59, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  llvm.br ^bb3
+^bb3:  // 2 preds: ^bb1, ^bb2
+  omp.yield(%7 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+  %6 = llvm.mlir.constant(0 : i64) : i64
+  %8 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr
+  %10 = llvm.ptrtoint %9 : !llvm.ptr to i64
+  %11 = llvm.icmp "ne" %10, %6 : i64
+  llvm.cond_br %11, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  llvm.call @free(%9) : (!llvm.ptr) -> ()
+  %15 = llvm.mlir.zero : !llvm.ptr
+  %16 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %17 = llvm.insertvalue %15, %16[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  llvm.store %17, %arg0 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  llvm.br ^bb2
+^bb2:  // 2 preds: ^bb0, ^bb1
+  omp.yield
+}
+llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i32) : i32
+  %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %4 = llvm.mlir.constant(1 : i64) : i64
+  %5 = llvm.alloca %4 x f32 {bindc_name = "real_var"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
+  %8 = llvm.mlir.constant(1 : i64) : i64
+  %9 = llvm.alloca %8 x !llvm.struct<(f32, f32)> {bindc_name = "comp_var"} : (i64) -> !llvm.ptr
+  %10 = llvm.mlir.constant(1 : i32) : i32
+  %11 = llvm.alloca %10 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %12 = llvm.mlir.constant(1 : i64) : i64
+  %13 = llvm.alloca %12 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var"} : (i64) -> !llvm.ptr
+  %14 = llvm.mlir.constant(0 : index) : i64
+  %15 = llvm.mlir.constant(1 : index) : i64
+  %16 = llvm.mlir.constant(0 : i64) : i64
+  %17 = llvm.mlir.zero : !llvm.ptr
+  %18 = llvm.mlir.constant(9 : i32) : i32
+  %19 = llvm.mlir.zero : !llvm.ptr
+  %20 = llvm.getelementptr %19[1] : (!llvm.ptr) -> !llvm.ptr, i32
+  %21 = llvm.ptrtoint %20 : !llvm.ptr to i64
+  %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %23 = llvm.insertvalue %21, %22[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  %24 = llvm.mlir.constant(20240719 : i32) : i32
+  %25 = llvm.insertvalue %24, %23[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  %26 = llvm.mlir.constant(0 : i32) : i32
+  %27 = llvm.trunc %26 : i32 to i8
+  %28 = llvm.insertvalue %27, %25[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  %29 = llvm.trunc %18 : i32 to i8
+  %30 = llvm.insertvalue %29, %28[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  %31 = llvm.mlir.constant(2 : i32) : i32
+  %32 = llvm.trunc %31 : i32 to i8
+  %33 = llvm.insertvalue %32, %30[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  %34 = llvm.mlir.constant(0 : i32) : i32
+  %35 = llvm.trunc %34 : i32 to i8
+  %36 = llvm.insertvalue %35, %33[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  %37 = llvm.insertvalue %17, %36[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+  llvm.store %37, %11 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  %38 = llvm.load %11 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  llvm.store %38, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  %39 = llvm.load %arg2 : !llvm.ptr -> i64
+  %40 = llvm.icmp "sgt" %39, %16 : i64
+  %41 = llvm.select %40, %39, %16 : i1, i64
+  %42 = llvm.mlir.constant(1 : i64) : i64
+  %43 = llvm.alloca %41 x i8 {bindc_name = "char_var"} : (i64) -> !llvm.ptr
+  %44 = llvm.load %arg0 : !llvm.ptr -> i64
+  %45 = llvm.load %arg1 : !llvm.ptr -> i64
+  %46 = llvm.sub %45, %44 : i64
+  %47 = llvm.add %46, %15 : i64
+  %48 = llvm.icmp "sgt" %47, %14 : i64
+  %49 = llvm.select %48, %47, %14 : i1, i64
+  %50 = llvm.mlir.constant(1 : i64) : i64
+  %51 = llvm.mul %49, %50 : i64
+  %52 = llvm.alloca %51 x f32 {bindc_name = "real_arr"} : (i64) -> !llvm.ptr
+  %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
+  %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  omp.target map_entries(%53 -> %arg3, %54 -> %arg4 : !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer %13 -> %arg5 : !llvm.ptr) {
+    %64 = llvm.mlir.constant(1 : i32) : i32
+    %65 = llvm.alloca %64 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+    %66 = llvm.mlir.constant(1 : i64) : i64
+    %67 = llvm.alloca %66 x i32 : (i64) -> !llvm.ptr
+    %68 = llvm.mlir.constant(18 : i32) : i32
+    %69 = llvm.mlir.constant(10 : i32) : i32
+    %70 = llvm.mlir.constant(5 : i32) : i32
+    llvm.store %70, %arg3 : i32, !llvm.ptr
+    llvm.store %69, %67 : i32, !llvm.ptr
+    %71 = llvm.mlir.constant(9 : i32) : i32
+    %72 = llvm.mlir.zero : !llvm.ptr
+    %73 = llvm.getelementptr %72[1] : (!llvm.ptr) -> !llvm.ptr, i32
+    %74 = llvm.ptrtoint %73 : !llvm.ptr to i64
+    %75 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+    %76 = llvm.insertvalue %74, %75[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    %77 = llvm.mlir.constant(20240719 : i32) : i32
+    %78 = llvm.insertvalue %77, %76[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    %79 = llvm.mlir.constant(0 : i32) : i32
+    %80 = llvm.trunc %79 : i32 to i8
+    %81 = llvm.insertvalue %80, %78[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    %82 = llvm.trunc %71 : i32 to i8
+    %83 = llvm.insertvalue %82, %81[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    %84 = llvm.mlir.constant(0 : i32) : i32
+    %85 = llvm.trunc %84 : i32 to i8
+    %86 = llvm.insertvalue %85, %83[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    %87 = llvm.mlir.constant(0 : i32) : i32
+    %88 = llvm.trunc %87 : i32 to i8
+    %89 = llvm.insertvalue %88, %86[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    %90 = llvm.insertvalue %67, %89[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> 
+    llvm.store %90, %65 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+    %91 = llvm.mlir.zero : !llvm.ptr
+    %92 = llvm.call @_FortranAAssign(%arg5, %65, %91, %68) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()>
+    omp.terminator
+  }
+  %55 = llvm.load %13 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  llvm.store %55, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  %56 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %57 = llvm.load %56 : !llvm.ptr -> !llvm.ptr
+  %58 = llvm.ptrtoint %57 : !llvm.ptr to i64
+  %59 = llvm.icmp "ne" %58, %16 : i64
+  llvm.cond_br %59, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  %60 = llvm.load %13 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  llvm.store %60, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  %61 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %62 = llvm.load %61 : !llvm.ptr -> !llvm.ptr
+  llvm.call @free(%62) : (!llvm.ptr) -> ()
+  %63 = llvm.load %11 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  llvm.store %63, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  llvm.br ^bb2
+^bb2:  // 2 preds: ^bb0, ^bb1
+  llvm.return
+}
+
+
+llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"}
+
+// The first set of checks ensure that we are calling the offloaded function
+// with the right arguments, especially the second argument which needs to
+// be a memory reference to the descriptor for the privatized allocatable
+// CHECK: define void @target_allocatable_
+// CHECK-NOT: define internal void
+// CHECK: %[[DESC_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
+// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}},
+// CHECK-SAME: ptr %[[DESC_ALLOC]])
+
+// The second set of checks ensure that to allocate memory for the
+// allocatable, we are, in fact, using the memory reference of the descriptor
+// passed as the second argument to the offloaded function.
+// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]]
+// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG:.*]]) {
+// CHECK: %[[HEAP_MEMREF:.*]] = call ptr @malloc(i64 0)
+// CHECK: %[[DESC_SETUP_MEMREF:.*]] = insertvalue
+// CHECK-SAME: { ptr, i64, i32, i8, i8, i8, i8 } undef, ptr %[[HEAP_MEMREF]], 0
+// Unfortunately, in the way the blocks are arranged, the store to the
+// privatized alloctables descriptor is encountered before we allocate
+// device-local memory for the descriptor (PRIVATE_DESC) itself
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[DESC_SETUP_MEMREF]],
+// CHECK-SAME: ptr %[[PRIVATE_DESC:.*]], align 8
+// CHECK: %[[PRIVATE_DESC]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK: %[[ORIG_DATA_PTR_MMBR_OF_DESC:.*]] = getelementptr 
+// CHECK-SAME: { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[DESCRIPTOR_ARG]],
+// CHECK-SAME: i32 0, i32 0
+// CHECK: %[[ORIG_DATA_PTR:.*]] = load ptr, ptr %[[ORIG_DATA_PTR_MMBR_OF_DESC]]
+// CHECK: %[[PTR_TO_INT:.*]] = ptrtoint ptr %[[ORIG_DATA_PTR]] to i64
+// CHECK: icmp ne i64 %[[PTR_TO_INT]], 0
+
+
+// CHECK: call {} @_FortranAAssign(ptr %[[DESC_TO_DEALLOC:[^,]+]],
+
+// Now, check the deallocation of the private var.
+// CHECK: call void @free(ptr %[[DATA_PTR_TO_FREE:.*]])
+// CHECK:   store { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK-SAME: { ptr null, i64 undef, i32 undef, i8 undef, i8 undef, i8 undef, i8 undef },
+// CHECK-SAME: ptr %[[DESC_TO_DEALLOC]]
+
+// CHECK: %[[DESC_MMBR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 },
+// CHECK-SAME: ptr %[[DESC_TO_DEALLOC]], i32 0, i32 0
+// CHECK: %[[DATA_PTR_TO_FREE]] = load ptr, ptr %[[DESC_MMBR]]
+// CHECK: ptrtoint ptr %[[DATA_PTR_TO_FREE]] to i64
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index e41b18f593efe8..93bdf5e9ff5c99 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -94,3 +94,93 @@ llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) {
 // CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4
 // CHECK: %[[PHI_ALLOCA:.*]]  = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ]
 // CHECK: %[[RESULT:.*]] = load float, ptr %[[PHI_ALLOCA]], align 4
+
+// Descriptors are needed for CHARACTER arrays and their type is
+// !fir.boxchar<KIND>. When such arrays are used in the private construct, the
+// privatizer takes a !fir.boxchar<KIND> as input. This type is lowered to
+// !llvm.struct<(ptr, i64)>. This is unique because with other types of data,
+// typically, the privatizer funtion takes a !llvm.ptr. Now, on the host side,
+// we map the descriptor using the map clause of the omp.target  op. map clauses
+// take only !llvm.ptr types. This means, we have a case where the descriptor is
+// mapped by its pointer whereas the privatizer function expects the descriptor
+// by value. So, we have this test to ensure that the compiler correctly loads
+// from the mapped pointer before passing that to the privatizer function.
+omp.private {type = private} @_QFtarget_boxcharEchar_var_private_boxchar_c8xU : !llvm.struct<(ptr, i64)> alloc {
+^bb0(%arg0: !llvm.struct<(ptr, i64)>):
+  %0 = llvm.extractvalue %arg0[0] : !llvm.struct<(ptr, i64)>
+  %1 = llvm.extractvalue %arg0[1] : !llvm.struct<(ptr, i64)>
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %1 x i8 {bindc_name = "char_var", pinned} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+  %5 = llvm.insertvalue %3, %4[0] : !llvm.struct<(ptr, i64)>
+  %6 = llvm.insertvalue %1, %5[1] : !llvm.struct<(ptr, i64)>
+  omp.yield(%6 : !llvm.struct<(ptr, i64)>)
+}
+llvm.func @target_boxchar_(%arg0: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_boxchar"} {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x !llvm.struct<(ptr, i64)> : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(0 : i64) : i64
+  %5 = llvm.load %arg0 : !llvm.ptr -> i64
+  %6 = llvm.icmp "sgt" %5, %4 : i64
+  %7 = llvm.select %6, %5, %4 : i1, i64
+  %9 = llvm.alloca %7 x i8 {bindc_name = "char_var"} : (i64) -> !llvm.ptr
+  %10 = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+  %11 = llvm.insertvalue %9, %10[0] : !llvm.struct<(ptr, i64)>
+  %12 = llvm.insertvalue %7, %11[1] : !llvm.struct<(ptr, i64)>
+  %13 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
+  llvm.store %12, %3 : !llvm.struct<(ptr, i64)>, !llvm.ptr
+  %14 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  omp.target map_entries(%13 -> %arg1, %14 -> %arg2 : !llvm.ptr, !llvm.ptr) private(@_QFtarget_boxcharEchar_var_private_boxchar_c8xU %12 -> %arg3 : !llvm.struct<(ptr, i64)>) {
+    %15 = llvm.mlir.constant(0 : index) : i64
+    %16 = llvm.mlir.constant(32 : i8) : i8
+    %17 = llvm.mlir.constant(1 : index) : i64
+    %18 = llvm.mlir.constant(false) : i1
+    %19 = llvm.mlir.constant(5 : index) : i64
+    %20 = llvm.mlir.constant(5 : i32) : i32
+    %21 = llvm.extractvalue %arg3[0] : !llvm.struct<(ptr, i64)>
+    %22 = llvm.extractvalue %arg3[1] : !llvm.struct<(ptr, i64)>
+    llvm.store %20, %arg1 : i32, !llvm.ptr
+    %23 = llvm.mlir.addressof @_QQclX68656C6C6F : !llvm.ptr
+    %24 = llvm.icmp "slt" %22, %19 : i64
+    %25 = llvm.select %24, %22, %19 : i1, i64
+    llvm.call @llvm.memmove.p0.p0.i64(%21, %23, %25, %18) : (!llvm.ptr, !llvm.ptr, i64, i1) -> ()
+    %26 = llvm.sub %22, %17 : i64
+    %27 = llvm.mlir.undef : !llvm.array<1 x i8>
+    %28 = llvm.insertvalue %16, %27[0] : !llvm.array<1 x i8>
+    %29 = llvm.sub %26, %25 : i64
+    %30 = llvm.add %29, %17 : i64
+    llvm.br ^bb1(%25, %30 : i64, i64)
+  ^bb1(%31: i64, %32: i64):  // 2 preds: ^bb0, ^bb2
+    %33 = llvm.icmp "sgt" %32, %15 : i64
+    llvm.cond_br %33, ^bb2, ^bb3
+  ^bb2:  // pred: ^bb1
+    %34 = llvm.getelementptr %21[%31] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1 x i8>
+    llvm.store %28, %34 : !llvm.array<1 x i8>, !llvm.ptr
+    %35 = llvm.add %31, %17 : i64
+    %36 = llvm.sub %32, %17 : i64
+    llvm.br ^bb1(%35, %36 : i64, i64)
+  ^bb3:  // pred: ^bb1
+    omp.terminator
+  }
+  llvm.return
+}
+llvm.mlir.global linkonce constant @_QQclX68656C6C6F() comdat(@__llvm_comdat::@_QQclX68656C6C6F) {addr_space = 0 : i32} : !llvm.array<5 x i8> {
+  %0 = llvm.mlir.constant("hello") : !llvm.array<5 x i8>
+  llvm.return %0 : !llvm.array<5 x i8>
+}
+llvm.comdat @__llvm_comdat {
+  llvm.comdat_selector @_QQclX68656C6C6F any
+}
+llvm.func @llvm.memmove.p0.p0.i64(!llvm.ptr, !llvm.ptr, i64, i1) attributes {sym_visibility = "private"}
+
+
+
+// CHECK: define internal void @__omp_offloading_{{.*}}(ptr %{{[^,]+}}, ptr %[[MAPPED_ARG:.*]]) {
+// CHECK: %[[BOXCHAR:.*]] = load { ptr, i64 }, ptr %[[MAPPED_ARG]]
+// CHECK: %[[BOXCHAR_PTR:.*]] = extractvalue { ptr, i64 } %[[BOXCHAR]], 0
+// CHECK: %[[BOXCHAR_i64:.*]] = extractvalue { ptr, i64 } %[[BOXCHAR]], 1
+// CHECK: %[[MEM_ALLOC:.*]] = alloca i8, i64 %[[BOXCHAR_i64]]
+// CHECK: %[[PRIV_BOXCHAR0:.*]] = insertvalue { ptr, i64 } undef, ptr %[[MEM_ALLOC]], 0
+// CHECK: %[[PRIV_BOXCHAR1:.*]] = insertvalue { ptr, i64 } %[[PRIV_BOXCHAR0]], i64 %[[BOXCHAR_i64]], 1
+

>From 521cc4a2caff26d3ed265de47eea9a8b441c58e8 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 21 Oct 2024 11:41:16 -0500
Subject: [PATCH 5/6] Clean up a little bit

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 69 +++++--------------
 1 file changed, 18 insertions(+), 51 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 83d07d10b4102a..2e013053a7d1c0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -41,7 +41,6 @@
 #include <optional>
 #include <utility>
 
-#define DEBUG_TYPE "openmp-to-llvm-ir-translation"
 using namespace mlir;
 
 namespace {
@@ -3322,6 +3321,14 @@ static bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) {
   Value blockArg0 = allocRegion.getArgument(0);
   return !blockArg0.use_empty();
 }
+
+// Return the llvm::Value * corresponding to the privateVar that
+// is being privatized. It isn't always as simple as looking up
+// moduleTranslation with privateVar. For instance, in case of
+// an allocatable, the descriptor for the allocatable is privatized.
+// This descriptor is mapped using an MapInfoOp. So, this function
+// will return a pointer to the llvm::Value corresponding to the
+// block argument for the mapped descriptor.
 static llvm::Value *
 findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp,
                         llvm::DenseMap<Value, int> &mappedPrivateVars,
@@ -3336,17 +3343,8 @@ findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp,
            "A block argument corresponding to a mapped var should have "
            "!llvm.ptr type");
 
-    LLVM_DEBUG(llvm::dbgs() << "privVarType = ");
-    LLVM_DEBUG(privVarType.dump());
-    LLVM_DEBUG(llvm::dbgs() << "\n");
-    LLVM_DEBUG(llvm::dbgs() << "blockArgType = ");
-    LLVM_DEBUG(blockArg.getType().dump());
-    LLVM_DEBUG(llvm::dbgs() << "\n");
-
     if (privVarType == blockArg.getType()) {
       llvm::Value *v = moduleTranslation.lookupValue(blockArg);
-      LLVM_DEBUG(llvm::dbgs() << "Host Associated Value : ");
-      LLVM_DEBUG(v->dump());
       return v;
     }
 
@@ -3358,13 +3356,9 @@ findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp,
       llvm::Value *load =
           builder.CreateLoad(moduleTranslation.convertType(privVarType),
                              moduleTranslation.lookupValue(blockArg));
-      LLVM_DEBUG(llvm::dbgs() << "Host Associated Value : ");
-      LLVM_DEBUG(load->dump());
       return load;
     }
   }
-  LLVM_DEBUG(llvm::dbgs() << "Host Associated Value : ");
-  LLVM_DEBUG(moduleTranslation.lookupValue(privateVar)->dump());
   return moduleTranslation.lookupValue(privateVar);
 }
 static LogicalResult
@@ -3415,54 +3409,36 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
     OperandRange privateVars = targetOp.getPrivateVars();
     std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
     auto reverseIt = mapVars.rbegin();
-    LLVM_DEBUG(llvm::dbgs() << "****Private Vars*******\n");
     for (auto [privVar, privSym] :
          llvm::reverse(llvm::zip_equal(privateVars, *privateSyms))) {
-      LLVM_DEBUG(llvm::dbgs() << "-- {\n");
-      LLVM_DEBUG(privVar.dump());
-      LLVM_DEBUG(llvm::dbgs() << "Type:-> ");
-      LLVM_DEBUG(privVar.getType().dump());
       SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
-      LLVM_DEBUG(llvm::dbgs() << "Privatizer: " << privatizerName << "\n}");
-
       omp::PrivateClauseOp privatizer =
-          SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
-              targetOp, privatizerName);
-      if (!privatizerNeedsMap(privatizer)) {
-        LLVM_DEBUG(llvm::dbgs() << "    - DOES NOT need map\n");
+          findPrivatizer(targetOp, privatizerName);
+      if (!privatizerNeedsMap(privatizer))
         continue;
-      }
-      LLVM_DEBUG(llvm::dbgs() << "      - NEEDS map");
-      LLVM_DEBUG(llvm::dbgs() << " -> Mapped Arg : -> ");
-      LLVM_DEBUG((*reverseIt).dump());
-      LLVM_DEBUG(llvm::dbgs() << " ->  Block Arg:  -> ");
-      LLVM_DEBUG(targetOp.getRegion().getArgument(lastMapBlockArgsIdx).dump());
-      LLVM_DEBUG(llvm::dbgs() << "\n");
 
       // The MapInfoOp defining the map var isn't really needed later.
-      // We'll do some sanity checks on it right now.
+      // So, we don't store it in any datastructure. Instead, we just
+      // do some sanity checks on it right now.
       omp::MapInfoOp mapInfoOp =
           llvm::cast<omp::MapInfoOp>((*reverseIt).getDefiningOp());
       Type varType = mapInfoOp.getVarType();
-      LLVM_DEBUG(llvm::dbgs() << "mapInfoOp.getVarType() = ");
-      LLVM_DEBUG(varType.dump());
-      LLVM_DEBUG(llvm::dbgs() << "\n");
 
+      // Check #1: Check that the type of the private variable matches
+      // the type of the variable being mapped.
       if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
         assert(
             varType == privVar.getType() &&
             "Type of private var doesn't match the type of the mapped value");
+
+      // Ok, only 1 sanity check for now.
+      // Record the index of the block argument corresponding to this
+      // mapvar.
       mappedPrivateVars.insert({privVar, lastMapBlockArgsIdx});
       lastMapBlockArgsIdx--;
       reverseIt++;
     }
   }
-  for (auto [privVar, blockArgIdx] : mappedPrivateVars) {
-    LLVM_DEBUG(llvm::dbgs() << "*****Private->MappedVars****\n");
-    LLVM_DEBUG(privVar.dump());
-    LLVM_DEBUG(llvm::dbgs() << "->\n");
-    LLVM_DEBUG(targetRegion.getArgument(blockArgIdx).dump());
-  }
   LogicalResult bodyGenStatus = success();
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto bodyCB = [&](InsertPointTy allocaIP,
@@ -3488,17 +3464,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
       llvm::Value *mapOpValue =
           moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
-      LLVM_DEBUG(llvm::dbgs()
-                 << "ModuleTranslation Mapping Table for mapVars\n");
-      LLVM_DEBUG(arg.dump());
-      LLVM_DEBUG(llvm::dbgs() << "---->"; mapOpValue->dump());
-      LLVM_DEBUG(llvm::dbgs() << "\n");
       moduleTranslation.mapValue(arg, mapOpValue);
     }
-    LLVM_DEBUG(llvm::dbgs() << "LLVM Module before privatization\n");
-    LLVM_DEBUG(llvm::dbgs() << "------------------------------------\n");
-    LLVM_DEBUG(builder.GetInsertBlock()->getParent()->getParent()->dump());
-    LLVM_DEBUG(llvm::dbgs() << "------------------------------------\n");
     // Do privatization after moduleTranslation has already recorded
     // mapped values.
     SmallVector<llvm::Value *> llvmPrivateVars;

>From 927909a88354809e01ddc3a55f99be4b45d94cb0 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 21 Oct 2024 13:21:40 -0500
Subject: [PATCH 6/6] Fix botched conflict resolution

---
 flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 82402dd09e83d5..2fa55844aec7c7 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -113,11 +113,6 @@ class MapsForPrivatizedSymbolsPass
     for (auto mapInfoOp : mapInfoOps)
       addMapInfoOp(targetOp, mapInfoOp);
   }
-  void addMapInfoOps(omp::TargetOp targetOp,
-                     llvm::SmallVectorImpl<omp::MapInfoOp> &mapInfoOps) {
-    for (auto mapInfoOp : mapInfoOps)
-      addMapInfoOp(targetOp, mapInfoOp);
-  }
   void runOnOperation() override {
     ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
     fir::KindMapping kindMap = fir::getKindMapping(module);



More information about the Mlir-commits mailing list