[flang-commits] [flang] [mlir] [mlir][OpenMP] - MLIR to LLVMIR translation support for delayed privatization of allocatables in `omp.target` ops (PR #116576)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Thu Nov 21 02:44:42 PST 2024


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/116576

>From 8d770fdaf36ef7c23d50f89bc9dfec6cb81d5aa9 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 18 Nov 2024 23:12:31 -0600
Subject: [PATCH 1/2] [mlir][OpenMP] Annotate `private` vars with `map_idx`
 when needed

This PR extends the MLIR representation for `omp.target` ops by adding a
`map_idx` to `private` vars. This annotation stores the index of the map
info operand corresponding to the private var. If the variable does not
have a map operand, the `map_idx` attribute is either not present at all
or its value is `-1`.
---
 .../OpenMP/MapsForPrivatizedSymbols.cpp       |  14 +-
 .../target-private-multiple-variables.f90     |   6 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  27 +++-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 135 +++++++++++++-----
 mlir/test/Dialect/OpenMP/ops.mlir             |  24 ++++
 5 files changed, 166 insertions(+), 40 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index 289e648eed8546..d2c814cc958ddf 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -1,5 +1,4 @@
-//===- MapsForPrivatizedSymbols.cpp
-//-----------------------------------------===//
+//===- MapsForPrivatizedSymbols.cpp ---------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -28,6 +27,7 @@
 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/OpenMP/Passes.h"
+
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -124,6 +124,8 @@ class MapsForPrivatizedSymbolsPass
       if (targetOp.getPrivateVars().empty())
         return;
       OperandRange privVars = targetOp.getPrivateVars();
+      llvm::SmallVector<int64_t> privVarMapIdx;
+
       std::optional<ArrayAttr> privSyms = targetOp.getPrivateSyms();
       SmallVector<omp::MapInfoOp, 4> mapInfoOps;
       for (auto [privVar, privSym] : llvm::zip_equal(privVars, *privSyms)) {
@@ -133,17 +135,25 @@ class MapsForPrivatizedSymbolsPass
             SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
                 targetOp, privatizerName);
         if (!privatizerNeedsMap(privatizer)) {
+          privVarMapIdx.push_back(-1);
           continue;
         }
+
+        privVarMapIdx.push_back(targetOp.getMapVars().size() +
+                                mapInfoOps.size());
+
         builder.setInsertionPoint(targetOp);
         Location loc = targetOp.getLoc();
         omp::MapInfoOp mapInfoOp = createMapInfo(loc, privVar, builder);
         mapInfoOps.push_back(mapInfoOp);
+
         LLVM_DEBUG(llvm::dbgs() << "MapsForPrivatizedSymbolsPass created ->\n");
         LLVM_DEBUG(mapInfoOp.dump());
       }
       if (!mapInfoOps.empty()) {
         mapInfoOpsForTarget.insert({targetOp.getOperation(), mapInfoOps});
+        targetOp.setPrivateMapsAttr(
+            mlir::DenseI64ArrayAttr::get(targetOp.getContext(), privVarMapIdx));
       }
     });
     if (!mapInfoOpsForTarget.empty()) {
diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90
index b0c76ff3845f83..f3f9bbe4a76a28 100644
--- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90
+++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90
@@ -171,12 +171,12 @@ end subroutine target_allocatable
 ! CHECK_SAME        %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] :
 ! CHECK-SAME        !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>)
 ! CHECK-SAME:     private(
-! CHECK-SAME:       @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]],
+! CHECK-SAME:       @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]] [map_idx=1],
 ! CHECK-SAME:       @[[REAL_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[REAL_ARG:[^,]+]],
 ! CHECK-SAME:       @[[LB_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[LB_ARG:[^,]+]],
-! CHECK-SAME:       @[[ARR_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ARR_ARG:[^,]+]],
+! CHECK-SAME:       @[[ARR_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ARR_ARG:[^,]+]] [map_idx=2],
 ! CHECK-SAME:       @[[COMP_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[COMP_ARG:[^,]+]],
-! CHECK-SAME:       @[[CHAR_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[CHAR_ARG:[^,]+]] :
+! CHECK-SAME:       @[[CHAR_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[CHAR_ARG:[^,]+]] [map_idx=3] :
 ! CHECK-SAME:       !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<f32>, !fir.ref<i64>, !fir.box<!fir.array<?xf32>>, !fir.ref<complex<f32>>, !fir.boxchar<1>) {
 ! CHECK-NOT:      fir.alloca
 ! CHECK:          hlfir.declare %[[ALLOC_ARG]]
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 156e6eb371b85d..f6c7f19fffddf9 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1225,21 +1225,46 @@ def TargetOp : OpenMP_Op<"target", traits = [
     The optional `if_expr` parameter specifies a boolean result of a conditional
     check. If this value is 1 or is not provided then the target region runs on
     a device, if it is 0 then the target region is executed on the host device.
+
+    The `private_maps` attribute connects `private` operands to their corresponding
+    `map` operands. For `private` operands that require a map, the value of the
+    corresponding element in the attribute is the index of the `map` operand
+    (relative to other `map` operands not the whole operands of the operation). For
+    `private` opernads that do not require a map, this value is -1 (which is omitted
+    from the assembly foramt printing).
   }] # clausesDescription;
 
+  let arguments = !con(clausesArgs,
+                       (ins OptionalAttr<DenseI64ArrayAttr>:$private_maps));
+
   let builders = [
     OpBuilder<(ins CArg<"const TargetOperands &">:$clauses)>
   ];
 
   let extraClassDeclaration = [{
     unsigned numMapBlockArgs() { return getMapVars().size(); }
+
+    mlir::Value getMappedValueForPrivateVar(unsigned privVarIdx) {
+      std::optional<DenseI64ArrayAttr> privateMapIdices = getPrivateMapsAttr();
+
+      if (!privateMapIdices.has_value())
+        return {};
+
+      int64_t mapInfoOpIdx = (*privateMapIdices)[privVarIdx];
+
+      if (mapInfoOpIdx == -1)
+        return {};
+
+      return getMapVars()[mapInfoOpIdx];
+    }
   }] # clausesExtraClassDeclaration;
 
   let assemblyFormat = clausesAssemblyFormat # [{
     custom<InReductionMapPrivateRegion>(
         $region, $in_reduction_vars, type($in_reduction_vars),
         $in_reduction_byref, $in_reduction_syms, $map_vars, type($map_vars),
-        $private_vars, type($private_vars), $private_syms) attr-dict
+        $private_vars, type($private_vars), $private_syms, $private_maps)
+        attr-dict
   }];
 
   let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 94e71e089d4b18..8c5f79a49a334f 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -487,9 +487,11 @@ struct PrivateParseArgs {
   llvm::SmallVectorImpl<OpAsmParser::UnresolvedOperand> &vars;
   llvm::SmallVectorImpl<Type> &types;
   ArrayAttr &syms;
+  DenseI64ArrayAttr *mapIndices;
   PrivateParseArgs(SmallVectorImpl<OpAsmParser::UnresolvedOperand> &vars,
-                   SmallVectorImpl<Type> &types, ArrayAttr &syms)
-      : vars(vars), types(types), syms(syms) {}
+                   SmallVectorImpl<Type> &types, ArrayAttr &syms,
+                   DenseI64ArrayAttr *mapIndices = nullptr)
+      : vars(vars), types(types), syms(syms), mapIndices(mapIndices) {}
 };
 struct ReductionParseArgs {
   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &vars;
@@ -517,8 +519,10 @@ static ParseResult parseClauseWithRegionArgs(
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &operands,
     SmallVectorImpl<Type> &types,
     SmallVectorImpl<OpAsmParser::Argument> &regionPrivateArgs,
-    ArrayAttr *symbols = nullptr, DenseBoolArrayAttr *byref = nullptr) {
+    ArrayAttr *symbols = nullptr, DenseI64ArrayAttr *mapIndices = nullptr,
+    DenseBoolArrayAttr *byref = nullptr) {
   SmallVector<SymbolRefAttr> symbolVec;
+  SmallVector<int64_t> mapIndicesVec;
   SmallVector<bool> isByRefVec;
   unsigned regionArgOffset = regionPrivateArgs.size();
 
@@ -538,6 +542,16 @@ static ParseResult parseClauseWithRegionArgs(
             parser.parseArgument(regionPrivateArgs.emplace_back()))
           return failure();
 
+        if (mapIndices) {
+          if (parser.parseOptionalLSquare().succeeded()) {
+            if (parser.parseKeyword("map_idx") || parser.parseEqual() ||
+                parser.parseInteger(mapIndicesVec.emplace_back()) ||
+                parser.parseRSquare())
+              return failure();
+          } else
+            mapIndicesVec.push_back(-1);
+        }
+
         return success();
       }))
     return failure();
@@ -571,6 +585,10 @@ static ParseResult parseClauseWithRegionArgs(
     *symbols = ArrayAttr::get(parser.getContext(), symbolAttrs);
   }
 
+  if (!mapIndicesVec.empty())
+    *mapIndices =
+        mlir::DenseI64ArrayAttr::get(parser.getContext(), mapIndicesVec);
+
   if (byref)
     *byref = makeDenseBoolArrayAttr(parser.getContext(), isByRefVec);
 
@@ -595,14 +613,14 @@ static ParseResult parseBlockArgClause(
 static ParseResult parseBlockArgClause(
     OpAsmParser &parser,
     llvm::SmallVectorImpl<OpAsmParser::Argument> &entryBlockArgs,
-    StringRef keyword, std::optional<PrivateParseArgs> reductionArgs) {
+    StringRef keyword, std::optional<PrivateParseArgs> privateArgs) {
   if (succeeded(parser.parseOptionalKeyword(keyword))) {
-    if (!reductionArgs)
+    if (!privateArgs)
       return failure();
 
-    if (failed(parseClauseWithRegionArgs(parser, reductionArgs->vars,
-                                         reductionArgs->types, entryBlockArgs,
-                                         &reductionArgs->syms)))
+    if (failed(parseClauseWithRegionArgs(
+            parser, privateArgs->vars, privateArgs->types, entryBlockArgs,
+            &privateArgs->syms, privateArgs->mapIndices)))
       return failure();
   }
   return success();
@@ -618,7 +636,8 @@ static ParseResult parseBlockArgClause(
 
     if (failed(parseClauseWithRegionArgs(
             parser, reductionArgs->vars, reductionArgs->types, entryBlockArgs,
-            &reductionArgs->syms, &reductionArgs->byref)))
+            &reductionArgs->syms, /*mapIndices=*/nullptr,
+            &reductionArgs->byref)))
       return failure();
   }
   return success();
@@ -674,12 +693,14 @@ static ParseResult parseInReductionMapPrivateRegion(
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &mapVars,
     SmallVectorImpl<Type> &mapTypes,
     llvm::SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateVars,
-    llvm::SmallVectorImpl<Type> &privateTypes, ArrayAttr &privateSyms) {
+    llvm::SmallVectorImpl<Type> &privateTypes, ArrayAttr &privateSyms,
+    DenseI64ArrayAttr &privateMaps) {
   AllRegionParseArgs args;
   args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
                                inReductionByref, inReductionSyms);
   args.mapArgs.emplace(mapVars, mapTypes);
-  args.privateArgs.emplace(privateVars, privateTypes, privateSyms);
+  args.privateArgs.emplace(privateVars, privateTypes, privateSyms,
+                           &privateMaps);
   return parseBlockArgRegion(parser, region, args);
 }
 
@@ -776,8 +797,10 @@ struct PrivatePrintArgs {
   ValueRange vars;
   TypeRange types;
   ArrayAttr syms;
-  PrivatePrintArgs(ValueRange vars, TypeRange types, ArrayAttr syms)
-      : vars(vars), types(types), syms(syms) {}
+  DenseI64ArrayAttr mapIndices;
+  PrivatePrintArgs(ValueRange vars, TypeRange types, ArrayAttr syms,
+                   DenseI64ArrayAttr mapIndices)
+      : vars(vars), types(types), syms(syms), mapIndices(mapIndices) {}
 };
 struct ReductionPrintArgs {
   ValueRange vars;
@@ -804,6 +827,7 @@ static void printClauseWithRegionArgs(OpAsmPrinter &p, MLIRContext *ctx,
                                       ValueRange argsSubrange,
                                       ValueRange operands, TypeRange types,
                                       ArrayAttr symbols = nullptr,
+                                      DenseI64ArrayAttr mapIndices = nullptr,
                                       DenseBoolArrayAttr byref = nullptr) {
   if (argsSubrange.empty())
     return;
@@ -815,21 +839,31 @@ static void printClauseWithRegionArgs(OpAsmPrinter &p, MLIRContext *ctx,
     symbols = ArrayAttr::get(ctx, values);
   }
 
+  if (!mapIndices) {
+    llvm::SmallVector<int64_t> values(operands.size(), -1);
+    mapIndices = DenseI64ArrayAttr::get(ctx, values);
+  }
+
   if (!byref) {
     mlir::SmallVector<bool> values(operands.size(), false);
     byref = DenseBoolArrayAttr::get(ctx, values);
   }
 
-  llvm::interleaveComma(
-      llvm::zip_equal(operands, argsSubrange, symbols, byref.asArrayRef()), p,
-      [&p](auto t) {
-        auto [op, arg, sym, isByRef] = t;
-        if (isByRef)
-          p << "byref ";
-        if (sym)
-          p << sym << " ";
-        p << op << " -> " << arg;
-      });
+  llvm::interleaveComma(llvm::zip_equal(operands, argsSubrange, symbols,
+                                        mapIndices.asArrayRef(),
+                                        byref.asArrayRef()),
+                        p, [&p](auto t) {
+                          auto [op, arg, sym, map, isByRef] = t;
+                          if (isByRef)
+                            p << "byref ";
+                          if (sym)
+                            p << sym << " ";
+
+                          p << op << " -> " << arg;
+
+                          if (map != -1)
+                            p << " [map_idx=" << map << "]";
+                        });
   p << " : ";
   llvm::interleaveComma(types, p);
   p << ") ";
@@ -849,7 +883,7 @@ static void printBlockArgClause(OpAsmPrinter &p, MLIRContext *ctx,
   if (privateArgs)
     printClauseWithRegionArgs(p, ctx, clauseName, argsSubrange,
                               privateArgs->vars, privateArgs->types,
-                              privateArgs->syms);
+                              privateArgs->syms, privateArgs->mapIndices);
 }
 
 static void
@@ -859,7 +893,8 @@ printBlockArgClause(OpAsmPrinter &p, MLIRContext *ctx, StringRef clauseName,
   if (reductionArgs)
     printClauseWithRegionArgs(p, ctx, clauseName, argsSubrange,
                               reductionArgs->vars, reductionArgs->types,
-                              reductionArgs->syms, reductionArgs->byref);
+                              reductionArgs->syms, /*mapIndices=*/nullptr,
+                              reductionArgs->byref);
 }
 
 static void printBlockArgRegion(OpAsmPrinter &p, Operation *op, Region &region,
@@ -891,12 +926,13 @@ static void printInReductionMapPrivateRegion(
     OpAsmPrinter &p, Operation *op, Region &region, ValueRange inReductionVars,
     TypeRange inReductionTypes, DenseBoolArrayAttr inReductionByref,
     ArrayAttr inReductionSyms, ValueRange mapVars, TypeRange mapTypes,
-    ValueRange privateVars, TypeRange privateTypes, ArrayAttr privateSyms) {
+    ValueRange privateVars, TypeRange privateTypes, ArrayAttr privateSyms,
+    DenseI64ArrayAttr privateMaps) {
   AllRegionPrintArgs args;
   args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
                                inReductionByref, inReductionSyms);
   args.mapArgs.emplace(mapVars, mapTypes);
-  args.privateArgs.emplace(privateVars, privateTypes, privateSyms);
+  args.privateArgs.emplace(privateVars, privateTypes, privateSyms, privateMaps);
   printBlockArgRegion(p, op, region, args);
 }
 
@@ -908,7 +944,8 @@ static void printInReductionPrivateRegion(
   AllRegionPrintArgs args;
   args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
                                inReductionByref, inReductionSyms);
-  args.privateArgs.emplace(privateVars, privateTypes, privateSyms);
+  args.privateArgs.emplace(privateVars, privateTypes, privateSyms,
+                           /*mapIndices=*/nullptr);
   printBlockArgRegion(p, op, region, args);
 }
 
@@ -921,7 +958,8 @@ static void printInReductionPrivateReductionRegion(
   AllRegionPrintArgs args;
   args.inReductionArgs.emplace(inReductionVars, inReductionTypes,
                                inReductionByref, inReductionSyms);
-  args.privateArgs.emplace(privateVars, privateTypes, privateSyms);
+  args.privateArgs.emplace(privateVars, privateTypes, privateSyms,
+                           /*mapIndices=*/nullptr);
   args.reductionArgs.emplace(reductionVars, reductionTypes, reductionByref,
                              reductionSyms);
   printBlockArgRegion(p, op, region, args);
@@ -931,7 +969,8 @@ static void printPrivateRegion(OpAsmPrinter &p, Operation *op, Region &region,
                                ValueRange privateVars, TypeRange privateTypes,
                                ArrayAttr privateSyms) {
   AllRegionPrintArgs args;
-  args.privateArgs.emplace(privateVars, privateTypes, privateSyms);
+  args.privateArgs.emplace(privateVars, privateTypes, privateSyms,
+                           /*mapIndices=*/nullptr);
   printBlockArgRegion(p, op, region, args);
 }
 
@@ -941,7 +980,8 @@ static void printPrivateReductionRegion(
     TypeRange reductionTypes, DenseBoolArrayAttr reductionByref,
     ArrayAttr reductionSyms) {
   AllRegionPrintArgs args;
-  args.privateArgs.emplace(privateVars, privateTypes, privateSyms);
+  args.privateArgs.emplace(privateVars, privateTypes, privateSyms,
+                           /*mapIndices=*/nullptr);
   args.reductionArgs.emplace(reductionVars, reductionTypes, reductionByref,
                              reductionSyms);
   printBlockArgRegion(p, op, region, args);
@@ -1560,6 +1600,24 @@ static LogicalResult verifyMapClause(Operation *op, OperandRange mapVars) {
   return success();
 }
 
+static LogicalResult verifyPrivateVarsMapping(TargetOp targetOp) {
+  std::optional<DenseI64ArrayAttr> privateMapIndices =
+      targetOp.getPrivateMapsAttr();
+
+  // None of the private operands are mapped.
+  if (!privateMapIndices.has_value() || !privateMapIndices.value())
+    return success();
+
+  OperandRange privateVars = targetOp.getPrivateVars();
+
+  if (privateMapIndices.value().size() !=
+      static_cast<int64_t>(privateVars.size()))
+    return emitError(targetOp.getLoc(), "sizes of `private` operand range and "
+                                        "`private_maps` attribute mismatch");
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // TargetDataOp
 //===----------------------------------------------------------------------===//
@@ -1656,14 +1714,23 @@ void TargetOp::build(OpBuilder &builder, OperationState &state,
                   /*in_reduction_vars=*/{}, /*in_reduction_byref=*/nullptr,
                   /*in_reduction_syms=*/nullptr, clauses.isDevicePtrVars,
                   clauses.mapVars, clauses.nowait, clauses.privateVars,
-                  makeArrayAttr(ctx, clauses.privateSyms), clauses.threadLimit);
+                  makeArrayAttr(ctx, clauses.privateSyms), clauses.threadLimit,
+                  /*private_maps=*/nullptr);
 }
 
 LogicalResult TargetOp::verify() {
   LogicalResult verifyDependVars =
       verifyDependVarList(*this, getDependKinds(), getDependVars());
-  return failed(verifyDependVars) ? verifyDependVars
-                                  : verifyMapClause(*this, getMapVars());
+
+  if (failed(verifyDependVars))
+    return verifyDependVars;
+
+  LogicalResult verifyMapVars = verifyMapClause(*this, getMapVars());
+
+  if (failed(verifyMapVars))
+    return verifyMapVars;
+
+  return verifyPrivateVarsMapping(*this);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index c25a6ef4b4849b..94c63dd8e9aa0e 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2750,6 +2750,30 @@ func.func @omp_target_private(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_
   return
 }
 
+// CHECK-LABEL: omp_target_private_with_map_idx
+func.func @omp_target_private_with_map_idx(%map1: memref<?xi32>, %map2: memref<?xi32>, %priv_var: !llvm.ptr) -> () {
+  %mapv1 = omp.map.info var_ptr(%map1 : memref<?xi32>, tensor<?xi32>) map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
+  %mapv2 = omp.map.info var_ptr(%map2 : memref<?xi32>, tensor<?xi32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
+
+  // CHECK: omp.target
+
+  // CHECK-SAME: map_entries(
+  // CHECK-SAME:   %{{[^[:space:]]+}} -> %[[MAP1_ARG:[^[:space:]]+]],
+  // CHECK-SAME:   %{{[^[:space:]]+}} -> %[[MAP2_ARG:[^[:space:]]+]]
+  // CHECK-SAME:   : memref<?xi32>, memref<?xi32>
+  // CHECK-SAME: )
+
+  // CHECK-SAME: private(
+  // CHECK-SAME:   @x.privatizer %{{[^[:space:]]+}} -> %[[PRIV_ARG:[^[:space:]]+]] [map_idx=1]
+  // CHECK-SAME:   : !llvm.ptr
+  // CHECK-SAME: )
+  omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref<?xi32>, memref<?xi32>) private(@x.privatizer %priv_var -> %priv_arg [map_idx=1] : !llvm.ptr) {
+    omp.terminator
+  }
+
+  return
+}
+
 // CHECK-LABEL: omp_loop
 func.func @omp_loop(%lb : index, %ub : index, %step : index) {
   // CHECK: omp.loop {

>From 51cfb933f8ff80ec97933674bc9278c7f98f4f20 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sun, 17 Nov 2024 23:22:20 -0600
Subject: [PATCH 2/2] [mlir][OpenMP] - MLIR to LLVMIR translation support for
 delayed privatization of allocatables in `omp.target` ops

This PR adds support to translate the `private` clause from MLIR to
LLVMIR when used on allocatables in the context of an `omp.target` op.
---
 .../OpenMP/MapsForPrivatizedSymbols.cpp       |   9 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  10 ++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 134 ++++++++++++++++--
 .../openmp-target-multiple-private.mlir       |  80 +++++++++++
 .../openmp-target-private-allocatable.mlir    | 108 ++++++++++++++
 .../Target/LLVMIR/openmp-target-private.mlir  |  89 ++++++++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  18 ---
 7 files changed, 413 insertions(+), 35 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir

diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
index d2c814cc958ddf..c990bebcabde42 100644
--- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
@@ -49,13 +49,6 @@ class MapsForPrivatizedSymbolsPass
     : public flangomp::impl::MapsForPrivatizedSymbolsPassBase<
           MapsForPrivatizedSymbolsPass> {
 
-  bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) {
-    Region &allocRegion = privatizer.getAllocRegion();
-    Value blockArg0 = allocRegion.getArgument(0);
-    if (blockArg0.use_empty())
-      return false;
-    return true;
-  }
   omp::MapInfoOp createMapInfo(Location loc, Value var,
                                fir::FirOpBuilder &builder) {
     uint64_t mapTypeTo = static_cast<
@@ -134,7 +127,7 @@ class MapsForPrivatizedSymbolsPass
         omp::PrivateClauseOp privatizer =
             SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
                 targetOp, privatizerName);
-        if (!privatizerNeedsMap(privatizer)) {
+        if (!privatizer.needsMap()) {
           privVarMapIdx.push_back(-1);
           continue;
         }
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index f6c7f19fffddf9..477fcb6ff77663 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -135,6 +135,16 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
       auto &region = getDeallocRegion();
       return region.empty() ? nullptr : region.getArgument(0);
     }
+    /// privatizerNeedsMap returns true if the value being privatized in an
+    /// omp.target p should additionally be mapped to the target region
+    /// using a MapInfoOp. This is most common when an allocatable is privatized.
+    /// In such cases, the descriptor is use in privatization and needs to be
+    /// mapped on to the device.
+    bool needsMap() {
+      Value blockArg0 = getAllocRegion().getArgument(0);
+      return !blockArg0.use_empty();
+    }
+
   }];
 
   let hasRegionVerifier = 1;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4ee0222f9d6cf7..e043b0532d0209 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -305,10 +305,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
             if (privatizer.getDataSharingType() ==
                 omp::DataSharingClauseType::FirstPrivate)
               result = todo("firstprivate");
-
-            if (!privatizer.getDeallocRegion().empty())
-              result = op.emitError("not yet implemented: privatization of "
-                                    "structures in omp.target operation");
           }
         }
         checkThreadLimit(op, result);
@@ -3814,6 +3810,43 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
   return builder.saveIP();
 }
 
+/// Return the llvm::Value * corresponding to the `privateVar` that
+/// is being privatized. It isn't always as simple as looking up
+/// moduleTranslation with privateVar. For instance, in case of
+/// an allocatable, the descriptor for the allocatable is privatized.
+/// This descriptor is mapped using an MapInfoOp. So, this function
+/// will return a pointer to the llvm::Value corresponding to the
+/// block argument for the mapped descriptor.
+static llvm::Value *
+findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp,
+                        llvm::DenseMap<Value, int> &mappedPrivateVars,
+                        llvm::IRBuilderBase &builder,
+                        LLVM::ModuleTranslation &moduleTranslation) {
+  if (mappedPrivateVars.contains(privateVar)) {
+    int blockArgIndex = mappedPrivateVars[privateVar];
+    Value blockArg = targetOp.getRegion().getArgument(blockArgIndex);
+    Type privVarType = privateVar.getType();
+    Type blockArgType = blockArg.getType();
+    assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
+           "A block argument corresponding to a mapped var should have "
+           "!llvm.ptr type");
+
+    if (privVarType == blockArg.getType())
+      return moduleTranslation.lookupValue(blockArg);
+
+    if (!isa<LLVM::LLVMPointerType>(privVarType)) {
+      // This typically happens when the privatized type is lowered from
+      // boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
+      // struct/pair is passed by value. But, mapped values are passed only as
+      // pointers, so before we privatize, we must load the pointer.
+      llvm::Value *load =
+          builder.CreateLoad(moduleTranslation.convertType(privVarType),
+                             moduleTranslation.lookupValue(blockArg));
+      return load;
+    }
+  }
+  return moduleTranslation.lookupValue(privateVar);
+}
 static LogicalResult
 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
@@ -3825,6 +3858,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   bool isTargetDevice = ompBuilder->Config.isTargetDevice();
   auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
   auto &targetRegion = targetOp.getRegion();
+  // Holds the private vars that have been mapped along with
+  // the block argument that corresponds to the MapInfoOp
+  // corresponding to the private var in question.
+  // So, for instance
+  //
+  // %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
+  // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
+  //
+  // Then, %10 has been created so that the descriptor can be used by the
+  // privatizer
+  // @box.privatizer on the device side. Here we'd record {%6#0, 0} in the
+  // mappedPrivateVars map.
+  llvm::DenseMap<Value, int> mappedPrivateVars;
   DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
   SmallVector<Value> mapVars = targetOp.getMapVars();
   ArrayRef<BlockArgument> mapBlockArgs =
@@ -3836,6 +3882,55 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   bool isOffloadEntry =
       isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
 
+  // For some private variables, the MapsForPrivatizedVariablesPass
+  // creates MapInfoOp instances. Go through the private variables and
+  // the mapped variables so that during codegeneration we are able
+  // to quickly look up the corresponding map variable, if any for each
+  // private variable.
+  if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
+    auto argIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
+    OperandRange privateVars = targetOp.getPrivateVars();
+    std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
+    std::optional<DenseI64ArrayAttr> privateMapIndices =
+        targetOp.getPrivateMapsAttr();
+
+    for (auto [privVarIdx, privVarSymPair] :
+         llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
+      auto privVar = std::get<0>(privVarSymPair);
+      auto privSym = std::get<1>(privVarSymPair);
+
+      SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
+      omp::PrivateClauseOp privatizer =
+          findPrivatizer(targetOp, privatizerName);
+
+      if (!privatizer.needsMap())
+        continue;
+
+      mlir::Value mappedValue =
+          targetOp.getMappedValueForPrivateVar(privVarIdx);
+      assert(mappedValue);
+
+      // The MapInfoOp defining the map var isn't really needed later.
+      // So, we don't store it in any datastructure. Instead, we just
+      // do some sanity checks on it right now.
+      auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
+      Type varType = mapInfoOp.getVarType();
+
+      // Check #1: Check that the type of the private variable matches
+      // the type of the variable being mapped.
+      if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
+        assert(
+            varType == privVar.getType() &&
+            "Type of private var doesn't match the type of the mapped value");
+
+      // Ok, only 1 sanity check for now.
+      // Record the index of the block argument corresponding to this
+      // mapvar.
+      mappedPrivateVars.insert({privVar, argIface.getMapBlockArgsStart() +
+                                             (*privateMapIndices)[privVarIdx]});
+    }
+  }
+
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
       -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
@@ -3862,9 +3957,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
           moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
       moduleTranslation.mapValue(arg, mapOpValue);
     }
-
     // Do privatization after moduleTranslation has already recorded
     // mapped values.
+    SmallVector<llvm::Value *> llvmPrivateVars;
+    SmallVector<Region *> privateCleanupRegions;
     if (!targetOp.getPrivateVars().empty()) {
       builder.restoreIP(allocaIP);
 
@@ -3880,11 +3976,13 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
         omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
         assert(privatizer.getDataSharingType() !=
                    omp::DataSharingClauseType::FirstPrivate &&
-               privatizer.getDeallocRegion().empty() &&
                "unsupported privatizer");
-        moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
-                                   moduleTranslation.lookupValue(privVar));
         Region &allocRegion = privatizer.getAllocRegion();
+        BlockArgument allocRegionArg = allocRegion.getArgument(0);
+        moduleTranslation.mapValue(
+            allocRegionArg,
+            findHostAssociatedValue(privVar, targetOp, mappedPrivateVars,
+                                    builder, moduleTranslation));
         SmallVector<llvm::Value *, 1> yieldedValues;
         if (failed(inlineConvertOmpRegions(
                 allocRegion, "omp.targetop.privatizer", builder,
@@ -3893,7 +3991,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
               "failed to inline `alloc` region of `omp.private`");
         }
         assert(yieldedValues.size() == 1);
-        moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
+        llvm::Value *llvmReplacementValue = yieldedValues.front();
+        moduleTranslation.mapValue(privBlockArg, llvmReplacementValue);
+        if (!privatizer.getDeallocRegion().empty()) {
+          llvmPrivateVars.push_back(llvmReplacementValue);
+          privateCleanupRegions.push_back(&privatizer.getDeallocRegion());
+        }
         moduleTranslation.forgetMapping(allocRegion);
         builder.restoreIP(builder.saveIP());
       }
@@ -3905,6 +4008,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       return exitBlock.takeError();
 
     builder.SetInsertPoint(*exitBlock);
+    if (!llvmPrivateVars.empty()) {
+      assert(llvmPrivateVars.size() == privateCleanupRegions.size() &&
+             "Number of private variables needing cleanup not equal to number"
+             "of privatizers with dealloc regions");
+      if (failed(inlineOmpRegionCleanup(
+              privateCleanupRegions, llvmPrivateVars, moduleTranslation,
+              builder, "omp.targetop.private.cleanup",
+              /*shouldLoadCleanupRegionArg=*/false))) {
+        return llvm::createStringError(
+            "failed to inline `dealloc` region of `omp.private` "
+            "op in the target region");
+      }
+    }
     return builder.saveIP();
   };
 
diff --git a/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
new file mode 100644
index 00000000000000..c632a0ee42f8a3
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
@@ -0,0 +1,80 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @dealloc_foo_0(!llvm.ptr)
+
+omp.private {type = private} @box.heap_privatizer0 : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>  : (i32) -> !llvm.ptr
+  omp.yield(%7 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+  llvm.call @dealloc_foo_0(%arg0) : (!llvm.ptr) -> ()
+  omp.yield
+}
+
+llvm.func @alloc_foo_1(!llvm.ptr)
+llvm.func @dealloc_foo_1(!llvm.ptr)
+
+omp.private {type = private} @box.heap_privatizer1 : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>  : (i32) -> !llvm.ptr
+  llvm.call @alloc_foo_1(%arg0) : (!llvm.ptr) -> ()
+  omp.yield(%7 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+  llvm.call @dealloc_foo_1(%arg0) : (!llvm.ptr) -> ()
+  omp.yield
+}
+
+llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} {
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
+  %13 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var0"} : (i64) -> !llvm.ptr
+  %14 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var1"} : (i64) -> !llvm.ptr
+  %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
+  %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  %55 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  omp.target map_entries(%53 -> %arg3, %54 -> %arg4, %55 ->%arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer0 %13 -> %arg6 [map_idx=1], @box.heap_privatizer1 %14 -> %arg7  [map_idx=2]: !llvm.ptr, !llvm.ptr) {
+    llvm.call @use_private_var0(%arg6) : (!llvm.ptr) -> ()
+    llvm.call @use_private_var1(%arg7) : (!llvm.ptr) -> ()
+    omp.terminator
+  }
+  llvm.return
+}
+
+
+llvm.func @use_private_var0(!llvm.ptr) -> ()
+llvm.func @use_private_var1(!llvm.ptr) -> ()
+
+// The first set of checks ensure that we are calling the offloaded function
+// with the right arguments, especially the second argument which needs to
+// be a memory reference to the descriptor for the privatized allocatable
+// CHECK: define void @target_allocatable_
+// CHECK-NOT: define internal void
+// CHECK: %[[DESC_ALLOC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
+// CHECK: %[[DESC_ALLOC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
+// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}},
+// CHECK-SAME: ptr %[[DESC_ALLOC0]], ptr %[[DESC_ALLOC1]])
+
+// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]]
+// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG0:[^,]+]],
+// CHECK-SAME: ptr %[[DESCRIPTOR_ARG1:.*]]) {
+
+// `var0` privatrizer `alloc`
+// CHECK: %[[PRIV_DESC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
+
+// `var1` privatrizer  `alloc`
+// CHECK: %[[PRIV_DESC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK: call void @alloc_foo_1(ptr %[[DESCRIPTOR_ARG1]])
+
+// target op body
+// CHECK: call void @use_private_var0(ptr %[[PRIV_DESC0]]
+// CHECK: call void @use_private_var1(ptr %[[PRIV_DESC1]]
+
+// `var0` privatrizer `dealloc`
+// CHECK:  call void @dealloc_foo_0(ptr %[[PRIV_DESC0]])
+
+// `var1` privatrizer `dealloc`
+// CHECK:  call void @dealloc_foo_1(ptr %[[PRIV_DESC1]])
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
new file mode 100644
index 00000000000000..7e2a26c922ead1
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
@@ -0,0 +1,108 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @alloc_foo_1(!llvm.ptr)
+llvm.func @dealloc_foo_1(!llvm.ptr)
+
+omp.private {type = private} @box.heap_privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>  : (i32) -> !llvm.ptr
+  llvm.call @alloc_foo_1(%arg0) : (!llvm.ptr) -> ()
+  omp.yield(%7 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+  llvm.call @dealloc_foo_1(%arg0) : (!llvm.ptr) -> ()
+  omp.yield
+}
+
+llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i32) : i32
+  %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %4 = llvm.mlir.constant(1 : i64) : i64
+  %5 = llvm.alloca %4 x f32 {bindc_name = "real_var"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
+  %8 = llvm.mlir.constant(1 : i64) : i64
+  %9 = llvm.alloca %8 x !llvm.struct<(f32, f32)> {bindc_name = "comp_var"} : (i64) -> !llvm.ptr
+  %10 = llvm.mlir.constant(1 : i32) : i32
+  %11 = llvm.alloca %10 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %12 = llvm.mlir.constant(1 : i64) : i64
+  %13 = llvm.alloca %12 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var"} : (i64) -> !llvm.ptr
+  %14 = llvm.mlir.constant(0 : index) : i64
+  %15 = llvm.mlir.constant(1 : index) : i64
+  %16 = llvm.mlir.constant(0 : i64) : i64
+  %17 = llvm.mlir.zero : !llvm.ptr
+  %18 = llvm.mlir.constant(9 : i32) : i32
+  %19 = llvm.mlir.zero : !llvm.ptr
+  %20 = llvm.getelementptr %19[1] : (!llvm.ptr) -> !llvm.ptr, i32
+  %21 = llvm.ptrtoint %20 : !llvm.ptr to i64
+  %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %23 = llvm.insertvalue %21, %22[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %24 = llvm.mlir.constant(20240719 : i32) : i32
+  %25 = llvm.insertvalue %24, %23[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %26 = llvm.mlir.constant(0 : i32) : i32
+  %27 = llvm.trunc %26 : i32 to i8
+  %28 = llvm.insertvalue %27, %25[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %29 = llvm.trunc %18 : i32 to i8
+  %30 = llvm.insertvalue %29, %28[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %31 = llvm.mlir.constant(2 : i32) : i32
+  %32 = llvm.trunc %31 : i32 to i8
+  %33 = llvm.insertvalue %32, %30[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %34 = llvm.mlir.constant(0 : i32) : i32
+  %35 = llvm.trunc %34 : i32 to i8
+  %36 = llvm.insertvalue %35, %33[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %37 = llvm.insertvalue %17, %36[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  llvm.store %37, %11 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  %38 = llvm.load %11 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  llvm.store %38, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+  %39 = llvm.load %arg2 : !llvm.ptr -> i64
+  %40 = llvm.icmp "sgt" %39, %16 : i64
+  %41 = llvm.select %40, %39, %16 : i1, i64
+  %42 = llvm.mlir.constant(1 : i64) : i64
+  %43 = llvm.alloca %41 x i8 {bindc_name = "char_var"} : (i64) -> !llvm.ptr
+  %44 = llvm.load %arg0 : !llvm.ptr -> i64
+  %45 = llvm.load %arg1 : !llvm.ptr -> i64
+  %46 = llvm.sub %45, %44 : i64
+  %47 = llvm.add %46, %15 : i64
+  %48 = llvm.icmp "sgt" %47, %14 : i64
+  %49 = llvm.select %48, %47, %14 : i1, i64
+  %50 = llvm.mlir.constant(1 : i64) : i64
+  %51 = llvm.mul %49, %50 : i64
+  %52 = llvm.alloca %51 x f32 {bindc_name = "real_arr"} : (i64) -> !llvm.ptr
+  %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
+  %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  omp.target map_entries(%53 -> %arg3, %54 -> %arg4 : !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer %13 -> %arg5 [map_idx=1] : !llvm.ptr) {
+    llvm.call @use_private_var(%arg5) : (!llvm.ptr) -> ()
+    omp.terminator
+  }
+  llvm.return
+}
+
+llvm.func @use_private_var(!llvm.ptr) -> ()
+
+llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"}
+
+// The first set of checks ensure that we are calling the offloaded function
+// with the right arguments, especially the second argument which needs to
+// be a memory reference to the descriptor for the privatized allocatable
+// CHECK: define void @target_allocatable_
+// CHECK-NOT: define internal void
+// CHECK: %[[DESC_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
+// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}},
+// CHECK-SAME: ptr %[[DESC_ALLOC]])
+
+// The second set of checks ensure that to allocate memory for the
+// allocatable, we are, in fact, using the memory reference of the descriptor
+// passed as the second argument to the offloaded function.
+// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]]
+// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG:.*]]) {
+// CHECK: %[[DESC_TO_DEALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
+// CHECK: call void @alloc_foo_1(ptr %[[DESCRIPTOR_ARG]])
+
+
+// CHECK: call void @use_private_var(ptr %[[DESC_TO_DEALLOC]]
+
+// Now, check the deallocation of the private var.
+// CHECK:  call void @dealloc_foo_1(ptr %[[DESC_TO_DEALLOC]])
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index e41b18f593efe8..40bccd324a75a8 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -94,3 +94,92 @@ llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) {
 // CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4
 // CHECK: %[[PHI_ALLOCA:.*]]  = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ]
 // CHECK: %[[RESULT:.*]] = load float, ptr %[[PHI_ALLOCA]], align 4
+
+// Descriptors are needed for CHARACTER arrays and their type is
+// !fir.boxchar<KIND>. When such arrays are used in the private construct, the
+// privatizer takes a !fir.boxchar<KIND> as input. This type is lowered to
+// !llvm.struct<(ptr, i64)>. This is unique because with other types of data,
+// typically, the privatizer funtion takes a !llvm.ptr. Now, on the host side,
+// we map the descriptor using the map clause of the omp.target  op. map clauses
+// take only !llvm.ptr types. This means, we have a case where the descriptor is
+// mapped by its pointer whereas the privatizer function expects the descriptor
+// by value. So, we have this test to ensure that the compiler correctly loads
+// from the mapped pointer before passing that to the privatizer function.
+omp.private {type = private} @_QFtarget_boxcharEchar_var_private_boxchar_c8xU : !llvm.struct<(ptr, i64)> alloc {
+^bb0(%arg0: !llvm.struct<(ptr, i64)>):
+  %0 = llvm.extractvalue %arg0[0] : !llvm.struct<(ptr, i64)>
+  %1 = llvm.extractvalue %arg0[1] : !llvm.struct<(ptr, i64)>
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %1 x i8 {bindc_name = "char_var", pinned} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+  %5 = llvm.insertvalue %3, %4[0] : !llvm.struct<(ptr, i64)>
+  %6 = llvm.insertvalue %1, %5[1] : !llvm.struct<(ptr, i64)>
+  omp.yield(%6 : !llvm.struct<(ptr, i64)>)
+}
+llvm.func @target_boxchar_(%arg0: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_boxchar"} {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x !llvm.struct<(ptr, i64)> : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(0 : i64) : i64
+  %5 = llvm.load %arg0 : !llvm.ptr -> i64
+  %6 = llvm.icmp "sgt" %5, %4 : i64
+  %7 = llvm.select %6, %5, %4 : i1, i64
+  %9 = llvm.alloca %7 x i8 {bindc_name = "char_var"} : (i64) -> !llvm.ptr
+  %10 = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+  %11 = llvm.insertvalue %9, %10[0] : !llvm.struct<(ptr, i64)>
+  %12 = llvm.insertvalue %7, %11[1] : !llvm.struct<(ptr, i64)>
+  %13 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
+  llvm.store %12, %3 : !llvm.struct<(ptr, i64)>, !llvm.ptr
+  %14 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
+  omp.target map_entries(%13 -> %arg1, %14 -> %arg2 : !llvm.ptr, !llvm.ptr) private(@_QFtarget_boxcharEchar_var_private_boxchar_c8xU %12 -> %arg3 [map_idx=1] : !llvm.struct<(ptr, i64)>) {
+    %15 = llvm.mlir.constant(0 : index) : i64
+    %16 = llvm.mlir.constant(32 : i8) : i8
+    %17 = llvm.mlir.constant(1 : index) : i64
+    %18 = llvm.mlir.constant(false) : i1
+    %19 = llvm.mlir.constant(5 : index) : i64
+    %20 = llvm.mlir.constant(5 : i32) : i32
+    %21 = llvm.extractvalue %arg3[0] : !llvm.struct<(ptr, i64)>
+    %22 = llvm.extractvalue %arg3[1] : !llvm.struct<(ptr, i64)>
+    llvm.store %20, %arg1 : i32, !llvm.ptr
+    %23 = llvm.mlir.addressof @_QQclX68656C6C6F : !llvm.ptr
+    %24 = llvm.icmp "slt" %22, %19 : i64
+    %25 = llvm.select %24, %22, %19 : i1, i64
+    llvm.call @llvm.memmove.p0.p0.i64(%21, %23, %25, %18) : (!llvm.ptr, !llvm.ptr, i64, i1) -> ()
+    %26 = llvm.sub %22, %17 : i64
+    %27 = llvm.mlir.undef : !llvm.array<1 x i8>
+    %28 = llvm.insertvalue %16, %27[0] : !llvm.array<1 x i8>
+    %29 = llvm.sub %26, %25 : i64
+    %30 = llvm.add %29, %17 : i64
+    llvm.br ^bb1(%25, %30 : i64, i64)
+  ^bb1(%31: i64, %32: i64):  // 2 preds: ^bb0, ^bb2
+    %33 = llvm.icmp "sgt" %32, %15 : i64
+    llvm.cond_br %33, ^bb2, ^bb3
+  ^bb2:  // pred: ^bb1
+    %34 = llvm.getelementptr %21[%31] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1 x i8>
+    llvm.store %28, %34 : !llvm.array<1 x i8>, !llvm.ptr
+    %35 = llvm.add %31, %17 : i64
+    %36 = llvm.sub %32, %17 : i64
+    llvm.br ^bb1(%35, %36 : i64, i64)
+  ^bb3:  // pred: ^bb1
+    omp.terminator
+  }
+  llvm.return
+}
+llvm.mlir.global linkonce constant @_QQclX68656C6C6F() comdat(@__llvm_comdat::@_QQclX68656C6C6F) {addr_space = 0 : i32} : !llvm.array<5 x i8> {
+  %0 = llvm.mlir.constant("hello") : !llvm.array<5 x i8>
+  llvm.return %0 : !llvm.array<5 x i8>
+}
+llvm.comdat @__llvm_comdat {
+  llvm.comdat_selector @_QQclX68656C6C6F any
+}
+llvm.func @llvm.memmove.p0.p0.i64(!llvm.ptr, !llvm.ptr, i64, i1) attributes {sym_visibility = "private"}
+
+
+
+// CHECK: define internal void @__omp_offloading_{{.*}}(ptr %{{[^,]+}}, ptr %[[MAPPED_ARG:.*]]) {
+// CHECK: %[[BOXCHAR:.*]] = load { ptr, i64 }, ptr %[[MAPPED_ARG]]
+// CHECK: %[[BOXCHAR_PTR:.*]] = extractvalue { ptr, i64 } %[[BOXCHAR]], 0
+// CHECK: %[[BOXCHAR_i64:.*]] = extractvalue { ptr, i64 } %[[BOXCHAR]], 1
+// CHECK: %[[MEM_ALLOC:.*]] = alloca i8, i64 %[[BOXCHAR_i64]]
+// CHECK: %[[PRIV_BOXCHAR0:.*]] = insertvalue { ptr, i64 } undef, ptr %[[MEM_ALLOC]], 0
+// CHECK: %[[PRIV_BOXCHAR1:.*]] = insertvalue { ptr, i64 } %[[PRIV_BOXCHAR0]], i64 %[[BOXCHAR_i64]], 1
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index f81d73fed5001a..ec7fadac1557c5 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -346,24 +346,6 @@ llvm.func @target_firstprivate(%x : !llvm.ptr) {
 
 // -----
 
-omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
-^bb0(%arg0: !llvm.ptr):
-  omp.yield(%arg0 : !llvm.ptr)
-} dealloc {
-^bb0(%arg0: !llvm.ptr):
-  omp.yield
-}
-llvm.func @target_struct_privatization(%x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: privatization of structures in omp.target operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.target}}
-  omp.target private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
-    omp.terminator
-  }
-  llvm.return
-}
-
-// -----
-
 llvm.func @target_thread_limit(%x : i32) {
   // expected-error at below {{not yet implemented: Unhandled clause thread_limit in omp.target operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.target}}



More information about the flang-commits mailing list