[flang-commits] [flang] [OpenMP]Update use_device_clause lowering (PR #101703)

Akash Banerjee via flang-commits flang-commits at lists.llvm.org
Tue Aug 13 05:55:42 PDT 2024


https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/101703

>From be90dc54b1049da3f4272efc697462db66844943 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Fri, 2 Aug 2024 17:01:27 +0100
Subject: [PATCH 1/3] [OpenMP]Update use_device_clause lowering

This patch updates the use_device_ptr and use_device_addr clauses to use the mapInfoOps for lowering. This allows all the types that are handle by the map clauses such as derived types to also be supported by the use_device_clauses.

This is patch 1/2 in a series of patches.

Co-authored-by: Raghu Maddhipatla raghu.maddhipatla at amd.com
---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    | 126 ++++++++++++++++--
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |  24 ++--
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  87 ++++++++----
 .../Transforms/OMPMapInfoFinalization.cpp     |  37 +++--
 flang/test/Lower/OpenMP/target.f90            |  12 +-
 .../use-device-ptr-to-use-device-addr.f90     |  98 +++++++-------
 6 files changed, 276 insertions(+), 108 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index c30844f40b7e04..c8cda7f9b519db 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1076,27 +1076,133 @@ bool ClauseProcessor::processEnter(
 }
 
 bool ClauseProcessor::processUseDeviceAddr(
+    Fortran::lower::StatementContext &stmtCtx,
     mlir::omp::UseDeviceAddrClauseOps &result,
     llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
     llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
-  return findRepeatableClause<omp::clause::UseDeviceAddr>(
-      [&](const omp::clause::UseDeviceAddr &clause, const parser::CharBlock &) {
-        addUseDeviceClause(converter, clause.v, result.useDeviceAddrVars,
-                           useDeviceTypes, useDeviceLocs, useDeviceSyms);
+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms)
+    const {
+  std::map<const Fortran::semantics::Symbol *,
+           llvm::SmallVector<OmpMapMemberIndicesData>>
+      parentMemberIndices;
+  bool clauseFound = findRepeatableClause<omp::clause::UseDeviceAddr>(
+      [&](const omp::clause::UseDeviceAddr &clause,
+          const Fortran::parser::CharBlock &) {
+        const Fortran::parser::CharBlock source;
+        mlir::Location location = converter.genLocation(source);
+        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+        llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+        for (const omp::Object &object : clause.v) {
+          llvm::SmallVector<mlir::Value> bounds;
+          std::stringstream asFortran;
+
+          Fortran::lower::AddrAndBoundsInfo info =
+              Fortran::lower::gatherDataOperandAddrAndBounds<
+                  mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
+                  converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
+                  object.ref(), location, asFortran, bounds,
+                  treatIndexAsSection);
+
+          auto origSymbol = converter.getSymbolAddress(*object.sym());
+          mlir::Value symAddr = info.addr;
+          if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
+            symAddr = origSymbol;
+
+          // Explicit map captures are captured ByRef by default,
+          // optimisation passes may alter this to ByCopy or other capture
+          // types to optimise
+          mlir::omp::MapInfoOp mapOp = createMapInfoOp(
+              firOpBuilder, location, symAddr,
+              /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
+              /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
+              static_cast<
+                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                  mapTypeBits),
+              mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
+
+          if (object.sym()->owner().IsDerivedType()) {
+            addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
+                                        semaCtx);
+          } else {
+            useDeviceSyms.push_back(object.sym());
+            useDeviceTypes.push_back(symAddr.getType());
+            useDeviceLocs.push_back(symAddr.getLoc());
+            result.useDeviceAddrVars.push_back(mapOp);
+          }
+        }
       });
+
+  insertChildMapInfoIntoParent(converter, parentMemberIndices,
+                               result.useDeviceAddrVars, useDeviceSyms,
+                               &useDeviceTypes, &useDeviceLocs);
+  return clauseFound;
 }
 
 bool ClauseProcessor::processUseDevicePtr(
+    Fortran::lower::StatementContext &stmtCtx,
     mlir::omp::UseDevicePtrClauseOps &result,
     llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
     llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
-  return findRepeatableClause<omp::clause::UseDevicePtr>(
-      [&](const omp::clause::UseDevicePtr &clause, const parser::CharBlock &) {
-        addUseDeviceClause(converter, clause.v, result.useDevicePtrVars,
-                           useDeviceTypes, useDeviceLocs, useDeviceSyms);
+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms)
+    const {
+  std::map<const Fortran::semantics::Symbol *,
+           llvm::SmallVector<OmpMapMemberIndicesData>>
+      parentMemberIndices;
+  bool clauseFound = findRepeatableClause<omp::clause::UseDevicePtr>(
+      [&](const omp::clause::UseDevicePtr &clause,
+          const Fortran::parser::CharBlock &) {
+        const Fortran::parser::CharBlock source;
+        mlir::Location location = converter.genLocation(source);
+        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+        llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+        for (const omp::Object &object : clause.v) {
+          llvm::SmallVector<mlir::Value> bounds;
+          std::stringstream asFortran;
+
+          Fortran::lower::AddrAndBoundsInfo info =
+              Fortran::lower::gatherDataOperandAddrAndBounds<
+                  mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
+                  converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
+                  object.ref(), location, asFortran, bounds,
+                  treatIndexAsSection);
+
+          auto origSymbol = converter.getSymbolAddress(*object.sym());
+          mlir::Value symAddr = info.addr;
+          if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
+            symAddr = origSymbol;
+
+          // Explicit map captures are captured ByRef by default,
+          // optimisation passes may alter this to ByCopy or other capture
+          // types to optimise
+          mlir::omp::MapInfoOp mapOp = createMapInfoOp(
+              firOpBuilder, location, symAddr,
+              /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
+              /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
+              static_cast<
+                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                  mapTypeBits),
+              mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
+
+          if (object.sym()->owner().IsDerivedType()) {
+            addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
+                                        semaCtx);
+          } else {
+            useDeviceSyms.push_back(object.sym());
+            useDeviceTypes.push_back(symAddr.getType());
+            useDeviceLocs.push_back(symAddr.getLoc());
+            result.useDevicePtrVars.push_back(mapOp);
+          }
+        }
       });
+
+  insertChildMapInfoIntoParent(converter, parentMemberIndices,
+                               result.useDevicePtrVars, useDeviceSyms,
+                               &useDeviceTypes, &useDeviceLocs);
+  return clauseFound;
 }
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 2c4b3857fda9f3..d33873516e9968 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -128,16 +128,20 @@ class ClauseProcessor {
       llvm::SmallVectorImpl<const semantics::Symbol *> *reductionSyms =
           nullptr) const;
   bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
-  bool processUseDeviceAddr(
-      mlir::omp::UseDeviceAddrClauseOps &result,
-      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-      llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const;
-  bool processUseDevicePtr(
-      mlir::omp::UseDevicePtrClauseOps &result,
-      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-      llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const;
+  bool
+  processUseDeviceAddr(Fortran::lower::StatementContext &stmtCtx,
+                       mlir::omp::UseDeviceAddrClauseOps &result,
+                       llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
+                       llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
+                       llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
+                           &useDeviceSyms) const;
+  bool
+  processUseDevicePtr(Fortran::lower::StatementContext &stmtCtx,
+                      mlir::omp::UseDevicePtrClauseOps &result,
+                      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
+                      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
+                      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
+                          &useDeviceSyms) const;
 
   template <typename T>
   bool processMotionClauses(lower::StatementContext &stmtCtx,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 26825468df9b1d..34ca1de9bfea31 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -703,32 +703,73 @@ static void genBodyOfTargetDataOp(
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::Region &region = dataOp.getRegion();
 
-  firOpBuilder.createBlock(&region, {}, useDeviceTypes, useDeviceLocs);
+  auto *regionBlock =
+      firOpBuilder.createBlock(&region, {}, useDeviceTypes, useDeviceLocs);
+
+  // Clones the `bounds` placing them inside the target region and returns them.
+  auto cloneBound = [&](mlir::Value bound) {
+    if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
+      mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
+      regionBlock->push_back(clonedOp);
+      return clonedOp->getResult(0);
+    }
+    TODO(converter.getCurrentLocation(),
+         "target map clause operand unsupported bound type");
+  };
+
+  auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
+    llvm::SmallVector<mlir::Value> clonedBounds;
+    for (mlir::Value bound : bounds)
+      clonedBounds.emplace_back(cloneBound(bound));
+    return clonedBounds;
+  };
 
   for (auto [argIndex, argSymbol] : llvm::enumerate(useDeviceSymbols)) {
     const mlir::BlockArgument &arg = region.front().getArgument(argIndex);
     fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol);
-    if (auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType())) {
-      if (fir::isa_builtin_cptr_type(refType.getElementType())) {
-        converter.bindSymbol(*argSymbol, arg);
-      } else {
-        // Avoid capture of a reference to a structured binding.
-        const semantics::Symbol *sym = argSymbol;
-        extVal.match(
-            [&](const fir::MutableBoxValue &mbv) {
-              converter.bindSymbol(
-                  *sym,
-                  fir::MutableBoxValue(
-                      arg, fir::factory::getNonDeferredLenParams(extVal), {}));
-            },
-            [&](const auto &) {
-              TODO(converter.getCurrentLocation(),
-                   "use_device clause operand unsupported type");
-            });
-      }
+    auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
+    if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
+      converter.bindSymbol(*argSymbol, arg);
     } else {
-      TODO(converter.getCurrentLocation(),
-           "use_device clause operand unsupported type");
+      // Avoid capture of a reference to a structured binding.
+      const Fortran::semantics::Symbol *sym = argSymbol;
+      // Structure component symbols don't have bindings.
+      if (sym->owner().IsDerivedType())
+        continue;
+      fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
+      extVal.match(
+          [&](const fir::BoxValue &v) {
+            converter.bindSymbol(*sym,
+                                 fir::BoxValue(arg, cloneBounds(v.getLBounds()),
+                                               v.getExplicitParameters(),
+                                               v.getExplicitExtents()));
+          },
+          [&](const fir::MutableBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
+                                           v.getMutableProperties()));
+          },
+          [&](const fir::ArrayBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
+                                         cloneBounds(v.getLBounds()),
+                                         v.getSourceBox()));
+          },
+          [&](const fir::CharArrayBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
+                                             cloneBounds(v.getExtents()),
+                                             cloneBounds(v.getLBounds())));
+          },
+          [&](const fir::CharBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
+          },
+          [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); },
+          [&](const auto &) {
+            TODO(converter.getCurrentLocation(),
+                 "target map clause operand unsupported type");
+          });
     }
   }
 
@@ -1193,9 +1234,9 @@ static void genTargetDataClauses(
   cp.processDevice(stmtCtx, clauseOps);
   cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps);
   cp.processMap(loc, stmtCtx, clauseOps);
-  cp.processUseDeviceAddr(clauseOps, useDeviceTypes, useDeviceLocs,
+  cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceTypes, useDeviceLocs,
                           useDeviceSyms);
-  cp.processUseDevicePtr(clauseOps, useDeviceTypes, useDeviceLocs,
+  cp.processUseDevicePtr(stmtCtx, clauseOps, useDeviceTypes, useDeviceLocs,
                          useDeviceSyms);
 
   // This function implements the deprecated functionality of use_device_ptr
diff --git a/flang/lib/Optimizer/Transforms/OMPMapInfoFinalization.cpp b/flang/lib/Optimizer/Transforms/OMPMapInfoFinalization.cpp
index ddaa3c5f404f0b..e3a8129a9fb73b 100644
--- a/flang/lib/Optimizer/Transforms/OMPMapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/Transforms/OMPMapInfoFinalization.cpp
@@ -106,13 +106,12 @@ class OMPMapInfoFinalizationPass
     // TODO: map the addendum segment of the descriptor, similarly to the
     // above base address/data pointer member.
 
-    if (auto mapClauseOwner =
-            llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
+    auto addOperands = [&](mlir::OperandRange &operandsArr,
+                           mlir::MutableOperandRange &mutableOpRange,
+                           auto directiveOp) {
       llvm::SmallVector<mlir::Value> newMapOps;
-      mlir::OperandRange mapVarsArr = mapClauseOwner.getMapVars();
-
-      for (size_t i = 0; i < mapVarsArr.size(); ++i) {
-        if (mapVarsArr[i] == op) {
+      for (size_t i = 0; i < operandsArr.size(); ++i) {
+        if (operandsArr[i] == op) {
           // Push new implicit maps generated for the descriptor.
           newMapOps.push_back(baseAddr);
 
@@ -120,13 +119,29 @@ class OMPMapInfoFinalizationPass
           // new additional map operand with an appropriate BlockArgument,
           // as the printing and later processing currently requires a 1:1
           // mapping of BlockArgs to MapInfoOp's at the same placement in
-          // each array (BlockArgs and MapVars).
-          if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target))
-            targetOp.getRegion().insertArgument(i, baseAddr.getType(), loc);
+          // each array (BlockArgs and MapOperands).
+          if (directiveOp) {
+            directiveOp.getRegion().insertArgument(i, baseAddr.getType(), loc);
+          }
         }
-        newMapOps.push_back(mapVarsArr[i]);
+        newMapOps.push_back(operandsArr[i]);
       }
-      mapClauseOwner.getMapVarsMutable().assign(newMapOps);
+      mutableOpRange.assign(newMapOps);
+    };
+    if (auto mapClauseOwner =
+            llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
+      mlir::OperandRange mapOperandsArr = mapClauseOwner.getMapVars();
+      mlir::MutableOperandRange mapMutableOpRange =
+          mapClauseOwner.getMapVarsMutable();
+      mlir::omp::TargetOp targetOp =
+          llvm::dyn_cast<mlir::omp::TargetOp>(target);
+      addOperands(mapOperandsArr, mapMutableOpRange, targetOp);
+    }
+    if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(target)) {
+      mlir::OperandRange useDevAddrArr = targetDataOp.getUseDeviceAddrVars();
+      mlir::MutableOperandRange useDevAddrMutableOpRange =
+          targetDataOp.getUseDeviceAddrVarsMutable();
+      addOperands(useDevAddrArr, useDevAddrMutableOpRange, targetDataOp);
     }
 
     mlir::Value newDescParentMapOp = builder.create<mlir::omp::MapInfoOp>(
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 9b92293cbf92f0..c37897df55ec25 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -527,21 +527,23 @@ end subroutine omp_target_device_ptr
  !===============================================================================
 
  !CHECK-LABEL: func.func @_QPomp_target_device_addr() {
- subroutine omp_target_device_addr
+subroutine omp_target_device_addr
    integer, pointer :: a
    !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
    !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
    !CHECK: %[[MAP_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr({{.*}} : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
    !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
-   !CHECK: omp.target_data map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) use_device_addr(%[[VAL_0_DECL]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
+   !CHECK: %[[DEV_ADDR_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr({{.*}} : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
+   !CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[DEV_ADDR_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
+   !CHECK: omp.target_data map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) use_device_addr(%[[DEV_ADDR_MEMBERS]], %[[DEV_ADDR]] : {{.*}}) {
    !$omp target data map(tofrom: a) use_device_addr(a)
-   !CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
-   !CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
+   !CHECK: ^bb0(%[[ARG_0:.*]]: !fir.llvm_ptr<!fir.ref<i32>>, %[[ARG_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
+   !CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[ARG_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
    !CHECK: %[[C10:.*]] = arith.constant 10 : i32
    !CHECK: %[[A_BOX:.*]] = fir.load %[[VAL_1_DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>
    !CHECK: %[[A_ADDR:.*]] = fir.box_addr %[[A_BOX]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
    !CHECK: hlfir.assign %[[C10]] to %[[A_ADDR]] : i32, !fir.ptr<i32>
-      a = 10
+   a = 10
    !CHECK: omp.terminator
    !$omp end target data
    !CHECK: }
diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
index acb5f533b619e8..9c9b35f472b71f 100644
--- a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
+++ b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
@@ -2,72 +2,72 @@
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
 ! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
 
-! This tests primary goal is to check the promotion of 
-! non-CPTR arguments from use_device_ptr to 
-! use_device_addr works, without breaking any 
-! functionality 
+! This tests primary goal is to check the promotion of
+! non-CPTR arguments from use_device_ptr to
+! use_device_addr works, without breaking any
+! functionality
 
 !CHECK: func.func @{{.*}}only_use_device_ptr()
-!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
-!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
-subroutine only_use_device_ptr 
+!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+!CHECK: ^bb0(%{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+subroutine only_use_device_ptr
     use iso_c_binding
     integer, pointer, dimension(:) :: array
     real, pointer :: pa(:)
     type(c_ptr) :: cptr
 
-   !$omp target data use_device_ptr(pa, cptr, array)
-   !$omp end target data 
-end subroutine
+       !$omp target data use_device_ptr(pa, cptr, array)
+       !$omp end target data
+     end subroutine
 
 !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
-!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
-!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
-subroutine mix_use_device_ptr_and_addr 
+!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+!CHECK: ^bb0(%{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+subroutine mix_use_device_ptr_and_addr
     use iso_c_binding
     integer, pointer, dimension(:) :: array
     real, pointer :: pa(:)
     type(c_ptr) :: cptr
 
-   !$omp target data use_device_ptr(pa, cptr) use_device_addr(array)
-   !$omp end target data 
-end subroutine
+       !$omp target data use_device_ptr(pa, cptr) use_device_addr(array)
+       !$omp end target data
+     end subroutine
 
-!CHECK: func.func @{{.*}}only_use_device_addr()
-!CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
-!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
-subroutine only_use_device_addr 
-    use iso_c_binding
-    integer, pointer, dimension(:) :: array
-    real, pointer :: pa(:)
-    type(c_ptr) :: cptr
+     !CHECK: func.func @{{.*}}only_use_device_addr()
+     !CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+     !CHECK: ^bb0(%{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+     subroutine only_use_device_addr
+        use iso_c_binding
+        integer, pointer, dimension(:) :: array
+        real, pointer :: pa(:)
+        type(c_ptr) :: cptr
 
-   !$omp target data use_device_addr(pa, cptr, array)
-   !$omp end target data 
-end subroutine
+       !$omp target data use_device_addr(pa, cptr, array)
+       !$omp end target data
+     end subroutine
 
-!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
-!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
-!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
-subroutine mix_use_device_ptr_and_addr_and_map
-    use iso_c_binding
-    integer :: i, j
-    integer, pointer, dimension(:) :: array
-    real, pointer :: pa(:)
-    type(c_ptr) :: cptr
+     !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
+     !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_addr(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
+     !CHECK: ^bb0(%{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+     subroutine mix_use_device_ptr_and_addr_and_map
+        use iso_c_binding
+        integer :: i, j
+        integer, pointer, dimension(:) :: array
+        real, pointer :: pa(:)
+        type(c_ptr) :: cptr
 
-   !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
-   !$omp end target data 
-end subroutine
+       !$omp target data use_device_ptr(pa, cptr) use_device_addr(array) map(tofrom: i, j)
+       !$omp end target data
+     end subroutine
 
-!CHECK: func.func @{{.*}}only_use_map()
-!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
-subroutine only_use_map
-    use iso_c_binding
-    integer, pointer, dimension(:) :: array
-    real, pointer :: pa(:)
-    type(c_ptr) :: cptr
+     !CHECK: func.func @{{.*}}only_use_map()
+     !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
+     subroutine only_use_map
+        use iso_c_binding
+        integer, pointer, dimension(:) :: array
+        real, pointer :: pa(:)
+        type(c_ptr) :: cptr
 
-   !$omp target data map(pa, cptr, array)
-   !$omp end target data 
-end subroutine
+       !$omp target data map(pa, cptr, array)
+       !$omp end target data
+     end subroutine

>From d8d7b0b72e143fdb3a294b4cb2e928ac2fcb85f4 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Tue, 6 Aug 2024 17:27:28 +0100
Subject: [PATCH 2/3] Addressed reviewer comments.

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    | 215 +++++---------
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |  79 ++----
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 268 ++++++++++--------
 .../use-device-ptr-to-use-device-addr.f90     |   6 +-
 4 files changed, 252 insertions(+), 316 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index c8cda7f9b519db..f10b0a4b482146 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -909,13 +909,68 @@ bool ClauseProcessor::processLink(
       });
 }
 
+void ClauseProcessor::processMapObjects(
+    lower::StatementContext &stmtCtx, mlir::Location clauseLocation,
+    const omp::ObjectList &objects,
+    llvm::omp::OpenMPOffloadMappingFlags mapTypeBits,
+    std::map<const semantics::Symbol *,
+             llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
+    llvm::SmallVectorImpl<mlir::Value> &mapVars,
+    llvm::SmallVectorImpl<const semantics::Symbol *> *mapSyms,
+    llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
+    llvm::SmallVectorImpl<mlir::Type> *mapSymTypes) const {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  for (const omp::Object &object : objects) {
+    llvm::SmallVector<mlir::Value> bounds;
+    std::stringstream asFortran;
+
+    lower::AddrAndBoundsInfo info =
+        lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
+                                              mlir::omp::MapBoundsType>(
+            converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
+            object.ref(), clauseLocation, asFortran, bounds,
+            treatIndexAsSection);
+
+    auto origSymbol = converter.getSymbolAddress(*object.sym());
+    mlir::Value symAddr = info.addr;
+    if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
+      symAddr = origSymbol;
+
+    // Explicit map captures are captured ByRef by default,
+    // optimisation passes may alter this to ByCopy or other capture
+    // types to optimise
+    auto location = mlir::NameLoc::get(
+        mlir::StringAttr::get(firOpBuilder.getContext(), asFortran.str()),
+        symAddr.getLoc());
+    mlir::omp::MapInfoOp mapOp = createMapInfoOp(
+        firOpBuilder, location, symAddr,
+        /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
+        /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
+        static_cast<
+            std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+            mapTypeBits),
+        mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
+
+    if (object.sym()->owner().IsDerivedType()) {
+      addChildIndexAndMapToParent(object, parentMemberIndices, mapOp, semaCtx);
+    } else {
+      mapVars.push_back(mapOp);
+      if (mapSyms)
+        mapSyms->push_back(object.sym());
+      if (mapSymTypes)
+        mapSymTypes->push_back(symAddr.getType());
+      if (mapSymLocs)
+        mapSymLocs->push_back(symAddr.getLoc());
+    }
+  }
+}
+
 bool ClauseProcessor::processMap(
     mlir::Location currentLocation, lower::StatementContext &stmtCtx,
     mlir::omp::MapClauseOps &result,
     llvm::SmallVectorImpl<const semantics::Symbol *> *mapSyms,
     llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
     llvm::SmallVectorImpl<mlir::Type> *mapSymTypes) const {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   // We always require tracking of symbols, even if the caller does not,
   // so we create an optionally used local set of symbols when the mapSyms
   // argument is not present.
@@ -970,50 +1025,10 @@ bool ClauseProcessor::processMap(
           mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
                          llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
         }
-
-        for (const omp::Object &object : std::get<omp::ObjectList>(clause.t)) {
-          llvm::SmallVector<mlir::Value> bounds;
-          std::stringstream asFortran;
-
-          lower::AddrAndBoundsInfo info =
-              lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
-                                                    mlir::omp::MapBoundsType>(
-                  converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
-                  object.ref(), clauseLocation, asFortran, bounds,
-                  treatIndexAsSection);
-
-          auto origSymbol = converter.getSymbolAddress(*object.sym());
-          mlir::Value symAddr = info.addr;
-          if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
-            symAddr = origSymbol;
-
-          // Explicit map captures are captured ByRef by default,
-          // optimisation passes may alter this to ByCopy or other capture
-          // types to optimise
-          auto location = mlir::NameLoc::get(
-              mlir::StringAttr::get(firOpBuilder.getContext(), asFortran.str()),
-              symAddr.getLoc());
-          mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-              firOpBuilder, location, symAddr,
-              /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
-              /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
-              static_cast<
-                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                  mapTypeBits),
-              mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
-
-          if (object.sym()->owner().IsDerivedType()) {
-            addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
-                                        semaCtx);
-          } else {
-            result.mapVars.push_back(mapOp);
-            ptrMapSyms->push_back(object.sym());
-            if (mapSymTypes)
-              mapSymTypes->push_back(symAddr.getType());
-            if (mapSymLocs)
-              mapSymLocs->push_back(symAddr.getLoc());
-          }
-        }
+        processMapObjects(stmtCtx, clauseLocation,
+                          std::get<omp::ObjectList>(clause.t), mapTypeBits,
+                          parentMemberIndices, result.mapVars, ptrMapSyms,
+                          mapSymLocs, mapSymTypes);
       });
 
   insertChildMapInfoIntoParent(converter, parentMemberIndices, result.mapVars,
@@ -1076,62 +1091,23 @@ bool ClauseProcessor::processEnter(
 }
 
 bool ClauseProcessor::processUseDeviceAddr(
-    Fortran::lower::StatementContext &stmtCtx,
-    mlir::omp::UseDeviceAddrClauseOps &result,
+    lower::StatementContext &stmtCtx, mlir::omp::UseDeviceAddrClauseOps &result,
     llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
     llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms)
-    const {
-  std::map<const Fortran::semantics::Symbol *,
+    llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
+  std::map<const semantics::Symbol *,
            llvm::SmallVector<OmpMapMemberIndicesData>>
       parentMemberIndices;
   bool clauseFound = findRepeatableClause<omp::clause::UseDeviceAddr>(
-      [&](const omp::clause::UseDeviceAddr &clause,
-          const Fortran::parser::CharBlock &) {
-        const Fortran::parser::CharBlock source;
+      [&](const omp::clause::UseDeviceAddr &clause, const parser::CharBlock &) {
+        const parser::CharBlock source;
         mlir::Location location = converter.genLocation(source);
-        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
         llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
-        for (const omp::Object &object : clause.v) {
-          llvm::SmallVector<mlir::Value> bounds;
-          std::stringstream asFortran;
-
-          Fortran::lower::AddrAndBoundsInfo info =
-              Fortran::lower::gatherDataOperandAddrAndBounds<
-                  mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
-                  converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
-                  object.ref(), location, asFortran, bounds,
-                  treatIndexAsSection);
-
-          auto origSymbol = converter.getSymbolAddress(*object.sym());
-          mlir::Value symAddr = info.addr;
-          if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
-            symAddr = origSymbol;
-
-          // Explicit map captures are captured ByRef by default,
-          // optimisation passes may alter this to ByCopy or other capture
-          // types to optimise
-          mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-              firOpBuilder, location, symAddr,
-              /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
-              /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
-              static_cast<
-                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                  mapTypeBits),
-              mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
-
-          if (object.sym()->owner().IsDerivedType()) {
-            addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
-                                        semaCtx);
-          } else {
-            useDeviceSyms.push_back(object.sym());
-            useDeviceTypes.push_back(symAddr.getType());
-            useDeviceLocs.push_back(symAddr.getLoc());
-            result.useDeviceAddrVars.push_back(mapOp);
-          }
-        }
+        processMapObjects(stmtCtx, location, clause.v, mapTypeBits,
+                          parentMemberIndices, result.useDeviceAddrVars,
+                          &useDeviceSyms, &useDeviceLocs, &useDeviceTypes);
       });
 
   insertChildMapInfoIntoParent(converter, parentMemberIndices,
@@ -1141,62 +1117,23 @@ bool ClauseProcessor::processUseDeviceAddr(
 }
 
 bool ClauseProcessor::processUseDevicePtr(
-    Fortran::lower::StatementContext &stmtCtx,
-    mlir::omp::UseDevicePtrClauseOps &result,
+    lower::StatementContext &stmtCtx, mlir::omp::UseDevicePtrClauseOps &result,
     llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
     llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms)
-    const {
-  std::map<const Fortran::semantics::Symbol *,
+    llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
+  std::map<const semantics::Symbol *,
            llvm::SmallVector<OmpMapMemberIndicesData>>
       parentMemberIndices;
   bool clauseFound = findRepeatableClause<omp::clause::UseDevicePtr>(
-      [&](const omp::clause::UseDevicePtr &clause,
-          const Fortran::parser::CharBlock &) {
-        const Fortran::parser::CharBlock source;
+      [&](const omp::clause::UseDevicePtr &clause, const parser::CharBlock &) {
+        const parser::CharBlock source;
         mlir::Location location = converter.genLocation(source);
-        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
         llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
-        for (const omp::Object &object : clause.v) {
-          llvm::SmallVector<mlir::Value> bounds;
-          std::stringstream asFortran;
-
-          Fortran::lower::AddrAndBoundsInfo info =
-              Fortran::lower::gatherDataOperandAddrAndBounds<
-                  mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
-                  converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
-                  object.ref(), location, asFortran, bounds,
-                  treatIndexAsSection);
-
-          auto origSymbol = converter.getSymbolAddress(*object.sym());
-          mlir::Value symAddr = info.addr;
-          if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
-            symAddr = origSymbol;
-
-          // Explicit map captures are captured ByRef by default,
-          // optimisation passes may alter this to ByCopy or other capture
-          // types to optimise
-          mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-              firOpBuilder, location, symAddr,
-              /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
-              /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
-              static_cast<
-                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                  mapTypeBits),
-              mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
-
-          if (object.sym()->owner().IsDerivedType()) {
-            addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
-                                        semaCtx);
-          } else {
-            useDeviceSyms.push_back(object.sym());
-            useDeviceTypes.push_back(symAddr.getType());
-            useDeviceLocs.push_back(symAddr.getLoc());
-            result.useDevicePtrVars.push_back(mapOp);
-          }
-        }
+        processMapObjects(stmtCtx, location, clause.v, mapTypeBits,
+                          parentMemberIndices, result.useDevicePtrVars,
+                          &useDeviceSyms, &useDeviceLocs, &useDeviceTypes);
       });
 
   insertChildMapInfoIntoParent(converter, parentMemberIndices,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index d33873516e9968..a35b77626909e3 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -128,20 +128,18 @@ class ClauseProcessor {
       llvm::SmallVectorImpl<const semantics::Symbol *> *reductionSyms =
           nullptr) const;
   bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
-  bool
-  processUseDeviceAddr(Fortran::lower::StatementContext &stmtCtx,
-                       mlir::omp::UseDeviceAddrClauseOps &result,
-                       llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-                       llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-                       llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                           &useDeviceSyms) const;
-  bool
-  processUseDevicePtr(Fortran::lower::StatementContext &stmtCtx,
-                      mlir::omp::UseDevicePtrClauseOps &result,
-                      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-                      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-                      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                          &useDeviceSyms) const;
+  bool processUseDeviceAddr(
+      lower::StatementContext &stmtCtx,
+      mlir::omp::UseDeviceAddrClauseOps &result,
+      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
+      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
+      llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const;
+  bool processUseDevicePtr(
+      lower::StatementContext &stmtCtx,
+      mlir::omp::UseDevicePtrClauseOps &result,
+      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
+      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
+      llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const;
 
   template <typename T>
   bool processMotionClauses(lower::StatementContext &stmtCtx,
@@ -177,6 +175,17 @@ class ClauseProcessor {
   template <typename T>
   bool markClauseOccurrence(mlir::UnitAttr &result) const;
 
+  void processMapObjects(
+      lower::StatementContext &stmtCtx, mlir::Location clauseLocation,
+      const omp::ObjectList &objects,
+      llvm::omp::OpenMPOffloadMappingFlags mapTypeBits,
+      std::map<const semantics::Symbol *,
+               llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
+      llvm::SmallVectorImpl<mlir::Value> &mapVars,
+      llvm::SmallVectorImpl<const semantics::Symbol *> *mapSyms,
+      llvm::SmallVectorImpl<mlir::Location> *mapSymLocs = nullptr,
+      llvm::SmallVectorImpl<mlir::Type> *mapSymTypes = nullptr) const;
+
   lower::AbstractConverter &converter;
   semantics::SemanticsContext &semaCtx;
   List<Clause> clauses;
@@ -193,7 +202,6 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
   bool clauseFound = findRepeatableClause<T>(
       [&](const T &clause, const parser::CharBlock &source) {
         mlir::Location clauseLocation = converter.genLocation(source);
-        fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
         static_assert(std::is_same_v<T, omp::clause::To> ||
                       std::is_same_v<T, omp::clause::From>);
@@ -204,43 +212,10 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
                 ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
                 : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
 
-        auto &objects = std::get<ObjectList>(clause.t);
-        for (const omp::Object &object : objects) {
-          llvm::SmallVector<mlir::Value> bounds;
-          std::stringstream asFortran;
-
-          lower::AddrAndBoundsInfo info =
-              lower::gatherDataOperandAddrAndBounds<mlir::omp::MapBoundsOp,
-                                                    mlir::omp::MapBoundsType>(
-                  converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
-                  object.ref(), clauseLocation, asFortran, bounds,
-                  treatIndexAsSection);
-
-          auto origSymbol = converter.getSymbolAddress(*object.sym());
-          mlir::Value symAddr = info.addr;
-          if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
-            symAddr = origSymbol;
-
-          // Explicit map captures are captured ByRef by default,
-          // optimisation passes may alter this to ByCopy or other capture
-          // types to optimise
-          mlir::omp::MapInfoOp mapOp = createMapInfoOp(
-              firOpBuilder, clauseLocation, symAddr,
-              /*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
-              /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
-              static_cast<
-                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                  mapTypeBits),
-              mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
-
-          if (object.sym()->owner().IsDerivedType()) {
-            addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
-                                        semaCtx);
-          } else {
-            result.mapVars.push_back(mapOp);
-            mapSymbols.push_back(object.sym());
-          }
-        }
+        processMapObjects(stmtCtx, clauseLocation,
+                          std::get<ObjectList>(clause.t), mapTypeBits,
+                          parentMemberIndices, result.mapVars, &mapSymbols,
+                          nullptr, nullptr);
       });
 
   insertChildMapInfoIntoParent(converter, parentMemberIndices, result.mapVars,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 34ca1de9bfea31..3f77d9a776ca24 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -831,34 +831,40 @@ static void genIntermediateCommonBlockAccessors(
 
 // This functions creates a block for the body of the targetOp's region. It adds
 // all the symbols present in mapSymbols as block arguments to this block.
-static void genBodyOfTargetOp(
+static void genBodyOfTargetAndDataOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-    mlir::omp::TargetOp &targetOp,
-    llvm::ArrayRef<const semantics::Symbol *> mapSyms,
+    mlir::Operation &op, llvm::ArrayRef<const semantics::Symbol *> mapSyms,
     llvm::ArrayRef<mlir::Location> mapSymLocs,
-    llvm::ArrayRef<mlir::Type> mapSymTypes, DataSharingProcessor &dsp,
+    llvm::ArrayRef<mlir::Type> mapSymTypes,
     const mlir::Location &currentLocation, const ConstructQueue &queue,
-    ConstructQueue::const_iterator item) {
+    ConstructQueue::const_iterator item, DataSharingProcessor *dsp = nullptr,
+    bool isTargetOp = true) {
   assert(mapSymTypes.size() == mapSymLocs.size());
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Region &region = targetOp.getRegion();
+  mlir::Region &region = op.getRegion(0);
 
-  llvm::SmallVector<mlir::Type> allRegionArgTypes;
-  mergePrivateVarsInfo(targetOp, mapSymTypes,
-                       llvm::function_ref<mlir::Type(mlir::Value)>{
-                           [](mlir::Value v) { return v.getType(); }},
-                       allRegionArgTypes);
+  mlir::Block *regionBlock = nullptr;
+  if (isTargetOp) {
+    llvm::SmallVector<mlir::Type> allRegionArgTypes;
+    llvm::SmallVector<mlir::Location> allRegionArgLocs;
+    mergePrivateVarsInfo(mlir::cast<mlir::omp::TargetOp>(op), mapSymTypes,
+                         llvm::function_ref<mlir::Type(mlir::Value)>{
+                             [](mlir::Value v) { return v.getType(); }},
+                         allRegionArgTypes);
 
-  llvm::SmallVector<mlir::Location> allRegionArgLocs;
-  mergePrivateVarsInfo(targetOp, mapSymLocs,
-                       llvm::function_ref<mlir::Location(mlir::Value)>{
-                           [](mlir::Value v) { return v.getLoc(); }},
-                       allRegionArgLocs);
+    mergePrivateVarsInfo(mlir::cast<mlir::omp::TargetOp>(op), mapSymLocs,
+                         llvm::function_ref<mlir::Location(mlir::Value)>{
+                             [](mlir::Value v) { return v.getLoc(); }},
+                         allRegionArgLocs);
 
-  auto *regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
-                                               allRegionArgLocs);
+    regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
+                                           allRegionArgLocs);
+  } else {
+    regionBlock =
+        firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
+  }
 
   // Clones the `bounds` placing them inside the target region and returns them.
   auto cloneBound = [&](mlir::Value bound) {
@@ -887,105 +893,121 @@ static void genBodyOfTargetOp(
     if (sym->owner().IsDerivedType())
       continue;
     fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
-    extVal.match(
-        [&](const fir::BoxValue &v) {
-          converter.bindSymbol(*sym,
-                               fir::BoxValue(arg, cloneBounds(v.getLBounds()),
-                                             v.getExplicitParameters(),
-                                             v.getExplicitExtents()));
-        },
-        [&](const fir::MutableBoxValue &v) {
-          converter.bindSymbol(
-              *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
-                                         v.getMutableProperties()));
-        },
-        [&](const fir::ArrayBoxValue &v) {
-          converter.bindSymbol(
-              *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
-                                       cloneBounds(v.getLBounds()),
-                                       v.getSourceBox()));
-        },
-        [&](const fir::CharArrayBoxValue &v) {
-          converter.bindSymbol(
-              *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
-                                           cloneBounds(v.getExtents()),
-                                           cloneBounds(v.getLBounds())));
-        },
-        [&](const fir::CharBoxValue &v) {
-          converter.bindSymbol(*sym,
-                               fir::CharBoxValue(arg, cloneBound(v.getLen())));
-        },
-        [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); },
-        [&](const auto &) {
-          TODO(converter.getCurrentLocation(),
-               "target map clause operand unsupported type");
-        });
-  }
-
-  for (auto [argIndex, argSymbol] :
-       llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
-    argIndex = mapSyms.size() + argIndex;
-
-    const mlir::BlockArgument &arg = region.getArgument(argIndex);
-    converter.bindSymbol(*argSymbol,
-                         hlfir::translateToExtendedValue(
-                             currentLocation, firOpBuilder, hlfir::Entity{arg},
-                             /*contiguousHint=*/
-                             evaluate::IsSimplyContiguous(
-                                 *argSymbol, converter.getFoldingContext()))
-                             .first);
+    auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
+    if (!isTargetOp && refType &&
+        fir::isa_builtin_cptr_type(refType.getElementType())) {
+      converter.bindSymbol(*argSymbol, arg);
+    } else {
+      extVal.match(
+          [&](const fir::BoxValue &v) {
+            converter.bindSymbol(*sym,
+                                 fir::BoxValue(arg, cloneBounds(v.getLBounds()),
+                                               v.getExplicitParameters(),
+                                               v.getExplicitExtents()));
+          },
+          [&](const fir::MutableBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
+                                           v.getMutableProperties()));
+          },
+          [&](const fir::ArrayBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
+                                         cloneBounds(v.getLBounds()),
+                                         v.getSourceBox()));
+          },
+          [&](const fir::CharArrayBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
+                                             cloneBounds(v.getExtents()),
+                                             cloneBounds(v.getLBounds())));
+          },
+          [&](const fir::CharBoxValue &v) {
+            converter.bindSymbol(
+                *sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
+          },
+          [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); },
+          [&](const auto &) {
+            TODO(converter.getCurrentLocation(),
+                 "target map clause operand unsupported type");
+          });
+    }
   }
 
-  // Check if cloning the bounds introduced any dependency on the outer region.
-  // If so, then either clone them as well if they are MemoryEffectFree, or else
-  // copy them to a new temporary and add them to the map and block_argument
-  // lists and replace their uses with the new temporary.
-  llvm::SetVector<mlir::Value> valuesDefinedAbove;
-  mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
-  while (!valuesDefinedAbove.empty()) {
-    for (mlir::Value val : valuesDefinedAbove) {
-      mlir::Operation *valOp = val.getDefiningOp();
-      if (mlir::isMemoryEffectFree(valOp)) {
-        mlir::Operation *clonedOp = valOp->clone();
-        regionBlock->push_front(clonedOp);
-        val.replaceUsesWithIf(
-            clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
-              return use.getOwner()->getBlock() == regionBlock;
-            });
-      } else {
-        auto savedIP = firOpBuilder.getInsertionPoint();
-        firOpBuilder.setInsertionPointAfter(valOp);
-        auto copyVal =
-            firOpBuilder.createTemporary(val.getLoc(), val.getType());
-        firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
+  if (isTargetOp) {
+    for (auto [argIndex, argSymbol] :
+         llvm::enumerate(dsp->getAllSymbolsToPrivatize())) {
+      argIndex = mapSyms.size() + argIndex;
+
+      const mlir::BlockArgument &arg = region.getArgument(argIndex);
+      converter.bindSymbol(
+          *argSymbol, hlfir::translateToExtendedValue(
+                          currentLocation, firOpBuilder, hlfir::Entity{arg},
+                          /*contiguousHint=*/
+                          evaluate::IsSimplyContiguous(
+                              *argSymbol, converter.getFoldingContext()))
+                          .first);
+    }
 
-        llvm::SmallVector<mlir::Value> bounds;
-        std::stringstream name;
-        firOpBuilder.setInsertionPoint(targetOp);
-        mlir::Value mapOp = createMapInfoOp(
-            firOpBuilder, copyVal.getLoc(), copyVal,
-            /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
-            /*members=*/llvm::SmallVector<mlir::Value>{},
-            /*membersIndex=*/mlir::DenseIntElementsAttr{},
-            static_cast<
-                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
-            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
-        targetOp.getMapVarsMutable().append(mapOp);
-        mlir::Value clonedValArg =
-            region.addArgument(copyVal.getType(), copyVal.getLoc());
-        firOpBuilder.setInsertionPointToStart(regionBlock);
-        auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
-                                                       clonedValArg);
-        val.replaceUsesWithIf(
-            loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
-              return use.getOwner()->getBlock() == regionBlock;
-            });
-        firOpBuilder.setInsertionPoint(regionBlock, savedIP);
+    // Check if cloning the bounds introduced any dependency on the outer
+    // region. If so, then either clone them as well if they are
+    // MemoryEffectFree, or else copy them to a new temporary and add them to
+    // the map and block_argument lists and replace their uses with the new
+    // temporary.
+    llvm::SetVector<mlir::Value> valuesDefinedAbove;
+    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
+    while (!valuesDefinedAbove.empty()) {
+      for (mlir::Value val : valuesDefinedAbove) {
+        mlir::Operation *valOp = val.getDefiningOp();
+        if (mlir::isMemoryEffectFree(valOp)) {
+          mlir::Operation *clonedOp = valOp->clone();
+          regionBlock->push_front(clonedOp);
+          val.replaceUsesWithIf(
+              clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
+                return use.getOwner()->getBlock() == regionBlock;
+              });
+        } else {
+          auto savedIP = firOpBuilder.getInsertionPoint();
+          firOpBuilder.setInsertionPointAfter(valOp);
+          auto copyVal =
+              firOpBuilder.createTemporary(val.getLoc(), val.getType());
+          firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
+
+          llvm::SmallVector<mlir::Value> bounds;
+          std::stringstream name;
+          firOpBuilder.setInsertionPoint(&op);
+          mlir::Value mapOp = createMapInfoOp(
+              firOpBuilder, copyVal.getLoc(), copyVal,
+              /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
+              /*members=*/llvm::SmallVector<mlir::Value>{},
+              /*membersIndex=*/mlir::DenseIntElementsAttr{},
+              static_cast<
+                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
+              mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
+
+          if (isTargetOp)
+            mlir::cast<mlir::omp::TargetOp>(op).getMapVarsMutable().append(
+                mapOp);
+          else
+            mlir::cast<mlir::omp::TargetDataOp>(op).getMapVarsMutable().append(
+                mapOp);
+
+          mlir::Value clonedValArg =
+              region.addArgument(copyVal.getType(), copyVal.getLoc());
+          firOpBuilder.setInsertionPointToStart(regionBlock);
+          auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
+                                                         clonedValArg);
+          val.replaceUsesWithIf(
+              loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
+                return use.getOwner()->getBlock() == regionBlock;
+              });
+          firOpBuilder.setInsertionPoint(regionBlock, savedIP);
+        }
       }
+      valuesDefinedAbove.clear();
+      mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
     }
-    valuesDefinedAbove.clear();
-    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
   }
 
   // Insert dummy instruction to remember the insertion position. The
@@ -993,7 +1015,7 @@ static void genBodyOfTargetOp(
   // In the HLFIR flow there are hlfir.declares inserted above while
   // setting block arguments.
   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
-      targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
+      op.getLoc(), firOpBuilder.getIndexType());
 
   // Create blocks for unstructured regions. This has to be done since
   // blocks are initially allocated with the function as the parent region.
@@ -1014,8 +1036,9 @@ static void genBodyOfTargetOp(
   // within the region, binding them to the member symbol for the scope of the
   // region so that subsequent code generation within the region will utilise
   // our new member accesses we have created.
-  genIntermediateCommonBlockAccessors(converter, currentLocation, region,
-                                      mapSyms);
+  if (isTargetOp)
+    genIntermediateCommonBlockAccessors(converter, currentLocation, region,
+                                        mapSyms);
 
   if (ConstructQueue::const_iterator next = std::next(item);
       next != queue.end()) {
@@ -1025,7 +1048,8 @@ static void genBodyOfTargetOp(
     genNestedEvaluations(converter, eval);
   }
 
-  dsp.processStep2(targetOp, /*isLoop=*/false);
+  if (isTargetOp)
+    dsp->processStep2(mlir::cast<mlir::omp::TargetOp>(op), /*isLoop=*/false);
 }
 
 template <typename OpTy, typename... Args>
@@ -1828,8 +1852,9 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
-                    mapLocs, mapTypes, dsp, loc, queue, item);
+  genBodyOfTargetAndDataOp(converter, symTable, semaCtx, eval,
+                           *targetOp.getOperation(), mapSyms, mapLocs, mapTypes,
+                           loc, queue, item, &dsp);
   return targetOp;
 }
 
@@ -1850,9 +1875,10 @@ genTargetDataOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp,
-                        useDeviceTypes, useDeviceLocs, useDeviceSyms, loc,
-                        queue, item);
+  genBodyOfTargetAndDataOp(converter, symTable, semaCtx, eval,
+                           *targetDataOp.getOperation(), useDeviceSyms,
+                           useDeviceLocs, useDeviceTypes, loc, queue, item,
+                           /*dsp=*/nullptr, /*isTargetOp=*/false);
   return targetDataOp;
 }
 
diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
index 9c9b35f472b71f..085f5419fa7f88 100644
--- a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
+++ b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
@@ -2,10 +2,8 @@
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
 ! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
 
-! This tests primary goal is to check the promotion of
-! non-CPTR arguments from use_device_ptr to
-! use_device_addr works, without breaking any
-! functionality
+! This tests primary goal is to check the promotion of non-CPTR arguments from
+! use_device_ptr to use_device_addr works, without breaking any functionality.
 
 !CHECK: func.func @{{.*}}only_use_device_ptr()
 !CHECK: omp.target_data use_device_addr(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {

>From 7e4b6d702f1db7e001519c990893d59a191e74ec Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Tue, 13 Aug 2024 13:55:16 +0100
Subject: [PATCH 3/3] Addressed reviewer comments.

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp |   8 +-
 flang/lib/Lower/OpenMP/ClauseProcessor.h   |   3 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp          | 323 ++++++++-------------
 3 files changed, 130 insertions(+), 204 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index f10b0a4b482146..b5257f390380e9 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1099,8 +1099,8 @@ bool ClauseProcessor::processUseDeviceAddr(
            llvm::SmallVector<OmpMapMemberIndicesData>>
       parentMemberIndices;
   bool clauseFound = findRepeatableClause<omp::clause::UseDeviceAddr>(
-      [&](const omp::clause::UseDeviceAddr &clause, const parser::CharBlock &) {
-        const parser::CharBlock source;
+      [&](const omp::clause::UseDeviceAddr &clause,
+          const parser::CharBlock &source) {
         mlir::Location location = converter.genLocation(source);
         llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
@@ -1125,8 +1125,8 @@ bool ClauseProcessor::processUseDevicePtr(
            llvm::SmallVector<OmpMapMemberIndicesData>>
       parentMemberIndices;
   bool clauseFound = findRepeatableClause<omp::clause::UseDevicePtr>(
-      [&](const omp::clause::UseDevicePtr &clause, const parser::CharBlock &) {
-        const parser::CharBlock source;
+      [&](const omp::clause::UseDevicePtr &clause,
+          const parser::CharBlock &source) {
         mlir::Location location = converter.genLocation(source);
         llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index a35b77626909e3..b7a80c72c4ec5e 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -214,8 +214,7 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
 
         processMapObjects(stmtCtx, clauseLocation,
                           std::get<ObjectList>(clause.t), mapTypeBits,
-                          parentMemberIndices, result.mapVars, &mapSymbols,
-                          nullptr, nullptr);
+                          parentMemberIndices, result.mapVars, &mapSymbols);
       });
 
   insertChildMapInfoIntoParent(converter, parentMemberIndices, result.mapVars,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3f77d9a776ca24..2b4a2d4c7477ff 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -692,20 +692,9 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
   marker->erase();
 }
 
-static void genBodyOfTargetDataOp(
-    lower::AbstractConverter &converter, lower::SymMap &symTable,
-    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-    mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes,
-    llvm::ArrayRef<mlir::Location> useDeviceLocs,
-    llvm::ArrayRef<const semantics::Symbol *> useDeviceSymbols,
-    const mlir::Location &currentLocation, const ConstructQueue &queue,
-    ConstructQueue::const_iterator item) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Region &region = dataOp.getRegion();
-
-  auto *regionBlock =
-      firOpBuilder.createBlock(&region, {}, useDeviceTypes, useDeviceLocs);
-
+void mapBodySymbols(lower::AbstractConverter &converter, mlir::Region &region,
+                    mlir::Block *regionBlock,
+                    llvm::ArrayRef<const semantics::Symbol *> mapSyms) {
   // Clones the `bounds` placing them inside the target region and returns them.
   auto cloneBound = [&](mlir::Value bound) {
     if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
@@ -724,19 +713,19 @@ static void genBodyOfTargetDataOp(
     return clonedBounds;
   };
 
-  for (auto [argIndex, argSymbol] : llvm::enumerate(useDeviceSymbols)) {
-    const mlir::BlockArgument &arg = region.front().getArgument(argIndex);
-    fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol);
+  // Bind the symbols to their corresponding block arguments.
+  for (auto [argIndex, argSymbol] : llvm::enumerate(mapSyms)) {
+    const mlir::BlockArgument &arg = region.getArgument(argIndex);
+    // Avoid capture of a reference to a structured binding.
+    const semantics::Symbol *sym = argSymbol;
+    // Structure component symbols don't have bindings.
+    if (sym->owner().IsDerivedType())
+      continue;
+    fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
     auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
     if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
       converter.bindSymbol(*argSymbol, arg);
     } else {
-      // Avoid capture of a reference to a structured binding.
-      const Fortran::semantics::Symbol *sym = argSymbol;
-      // Structure component symbols don't have bindings.
-      if (sym->owner().IsDerivedType())
-        continue;
-      fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
       extVal.match(
           [&](const fir::BoxValue &v) {
             converter.bindSymbol(*sym,
@@ -772,14 +761,33 @@ static void genBodyOfTargetDataOp(
           });
     }
   }
+}
+
+static void genBodyOfTargetDataOp(
+    lower::AbstractConverter &converter, lower::SymMap &symTable,
+    semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+    mlir::omp::TargetDataOp &dataOp,
+    llvm::ArrayRef<const semantics::Symbol *> useDeviceSymbols,
+    llvm::ArrayRef<mlir::Location> useDeviceLocs,
+    llvm::ArrayRef<mlir::Type> useDeviceTypes,
+    const mlir::Location &currentLocation, const ConstructQueue &queue,
+    ConstructQueue::const_iterator item) {
+  assert(useDeviceTypes.size() == useDeviceLocs.size());
+
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Region &region = dataOp.getRegion();
+  mlir::Block *regionBlock = nullptr;
+  regionBlock =
+      firOpBuilder.createBlock(&region, {}, useDeviceTypes, useDeviceLocs);
+
+  mapBodySymbols(converter, region, regionBlock, useDeviceSymbols);
 
   // Insert dummy instruction to remember the insertion position. The
-  // marker will be deleted by clean up passes since there are no uses.
-  // Remembering the position for further insertion is important since
-  // there are hlfir.declares inserted above while setting block arguments
-  // and new code from the body should be inserted after that.
+  // marker will be deleted since there are not uses.
+  // In the HLFIR flow there are hlfir.declares inserted above while
+  // setting block arguments.
   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
-      dataOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
+      dataOp.getLoc(), firOpBuilder.getIndexType());
 
   // Create blocks for unstructured regions. This has to be done since
   // blocks are initially allocated with the function as the parent region.
@@ -790,7 +798,7 @@ static void genBodyOfTargetDataOp(
 
   firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
 
-  // Set the insertion point after the marker.
+  // Create the insertion point after the marker.
   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
 
   if (ConstructQueue::const_iterator next = std::next(item);
@@ -831,183 +839,106 @@ static void genIntermediateCommonBlockAccessors(
 
 // This functions creates a block for the body of the targetOp's region. It adds
 // all the symbols present in mapSymbols as block arguments to this block.
-static void genBodyOfTargetAndDataOp(
+static void genBodyOfTargetOp(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
-    mlir::Operation &op, llvm::ArrayRef<const semantics::Symbol *> mapSyms,
+    mlir::omp::TargetOp &targetOp,
+    llvm::ArrayRef<const semantics::Symbol *> mapSyms,
     llvm::ArrayRef<mlir::Location> mapSymLocs,
     llvm::ArrayRef<mlir::Type> mapSymTypes,
     const mlir::Location &currentLocation, const ConstructQueue &queue,
-    ConstructQueue::const_iterator item, DataSharingProcessor *dsp = nullptr,
-    bool isTargetOp = true) {
+    ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
   assert(mapSymTypes.size() == mapSymLocs.size());
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Region &region = op.getRegion(0);
+  mlir::Region &region = targetOp.getRegion();
 
   mlir::Block *regionBlock = nullptr;
-  if (isTargetOp) {
-    llvm::SmallVector<mlir::Type> allRegionArgTypes;
-    llvm::SmallVector<mlir::Location> allRegionArgLocs;
-    mergePrivateVarsInfo(mlir::cast<mlir::omp::TargetOp>(op), mapSymTypes,
-                         llvm::function_ref<mlir::Type(mlir::Value)>{
-                             [](mlir::Value v) { return v.getType(); }},
-                         allRegionArgTypes);
+  llvm::SmallVector<mlir::Type> allRegionArgTypes;
+  llvm::SmallVector<mlir::Location> allRegionArgLocs;
+  mergePrivateVarsInfo(mlir::cast<mlir::omp::TargetOp>(targetOp), mapSymTypes,
+                       llvm::function_ref<mlir::Type(mlir::Value)>{
+                           [](mlir::Value v) { return v.getType(); }},
+                       allRegionArgTypes);
 
-    mergePrivateVarsInfo(mlir::cast<mlir::omp::TargetOp>(op), mapSymLocs,
-                         llvm::function_ref<mlir::Location(mlir::Value)>{
-                             [](mlir::Value v) { return v.getLoc(); }},
-                         allRegionArgLocs);
+  mergePrivateVarsInfo(mlir::cast<mlir::omp::TargetOp>(targetOp), mapSymLocs,
+                       llvm::function_ref<mlir::Location(mlir::Value)>{
+                           [](mlir::Value v) { return v.getLoc(); }},
+                       allRegionArgLocs);
 
-    regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
-                                           allRegionArgLocs);
-  } else {
-    regionBlock =
-        firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
-  }
+  regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
+                                         allRegionArgLocs);
 
-  // Clones the `bounds` placing them inside the target region and returns them.
-  auto cloneBound = [&](mlir::Value bound) {
-    if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
-      mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
-      regionBlock->push_back(clonedOp);
-      return clonedOp->getResult(0);
-    }
-    TODO(converter.getCurrentLocation(),
-         "target map clause operand unsupported bound type");
-  };
+  mapBodySymbols(converter, region, regionBlock, mapSyms);
 
-  auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
-    llvm::SmallVector<mlir::Value> clonedBounds;
-    for (mlir::Value bound : bounds)
-      clonedBounds.emplace_back(cloneBound(bound));
-    return clonedBounds;
-  };
+  for (auto [argIndex, argSymbol] :
+       llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
+    argIndex = mapSyms.size() + argIndex;
 
-  // Bind the symbols to their corresponding block arguments.
-  for (auto [argIndex, argSymbol] : llvm::enumerate(mapSyms)) {
     const mlir::BlockArgument &arg = region.getArgument(argIndex);
-    // Avoid capture of a reference to a structured binding.
-    const semantics::Symbol *sym = argSymbol;
-    // Structure component symbols don't have bindings.
-    if (sym->owner().IsDerivedType())
-      continue;
-    fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
-    auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
-    if (!isTargetOp && refType &&
-        fir::isa_builtin_cptr_type(refType.getElementType())) {
-      converter.bindSymbol(*argSymbol, arg);
-    } else {
-      extVal.match(
-          [&](const fir::BoxValue &v) {
-            converter.bindSymbol(*sym,
-                                 fir::BoxValue(arg, cloneBounds(v.getLBounds()),
-                                               v.getExplicitParameters(),
-                                               v.getExplicitExtents()));
-          },
-          [&](const fir::MutableBoxValue &v) {
-            converter.bindSymbol(
-                *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
-                                           v.getMutableProperties()));
-          },
-          [&](const fir::ArrayBoxValue &v) {
-            converter.bindSymbol(
-                *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
-                                         cloneBounds(v.getLBounds()),
-                                         v.getSourceBox()));
-          },
-          [&](const fir::CharArrayBoxValue &v) {
-            converter.bindSymbol(
-                *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
-                                             cloneBounds(v.getExtents()),
-                                             cloneBounds(v.getLBounds())));
-          },
-          [&](const fir::CharBoxValue &v) {
-            converter.bindSymbol(
-                *sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
-          },
-          [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); },
-          [&](const auto &) {
-            TODO(converter.getCurrentLocation(),
-                 "target map clause operand unsupported type");
-          });
-    }
+    converter.bindSymbol(*argSymbol,
+                         hlfir::translateToExtendedValue(
+                             currentLocation, firOpBuilder, hlfir::Entity{arg},
+                             /*contiguousHint=*/
+                             evaluate::IsSimplyContiguous(
+                                 *argSymbol, converter.getFoldingContext()))
+                             .first);
   }
 
-  if (isTargetOp) {
-    for (auto [argIndex, argSymbol] :
-         llvm::enumerate(dsp->getAllSymbolsToPrivatize())) {
-      argIndex = mapSyms.size() + argIndex;
-
-      const mlir::BlockArgument &arg = region.getArgument(argIndex);
-      converter.bindSymbol(
-          *argSymbol, hlfir::translateToExtendedValue(
-                          currentLocation, firOpBuilder, hlfir::Entity{arg},
-                          /*contiguousHint=*/
-                          evaluate::IsSimplyContiguous(
-                              *argSymbol, converter.getFoldingContext()))
-                          .first);
-    }
+  // Check if cloning the bounds introduced any dependency on the outer
+  // region. If so, then either clone them as well if they are
+  // MemoryEffectFree, or else copy them to a new temporary and add them to
+  // the map and block_argument lists and replace their uses with the new
+  // temporary.
+  llvm::SetVector<mlir::Value> valuesDefinedAbove;
+  mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
+  while (!valuesDefinedAbove.empty()) {
+    for (mlir::Value val : valuesDefinedAbove) {
+      mlir::Operation *valOp = val.getDefiningOp();
+      if (mlir::isMemoryEffectFree(valOp)) {
+        mlir::Operation *clonedOp = valOp->clone();
+        regionBlock->push_front(clonedOp);
+        val.replaceUsesWithIf(
+            clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
+              return use.getOwner()->getBlock() == regionBlock;
+            });
+      } else {
+        auto savedIP = firOpBuilder.getInsertionPoint();
+        firOpBuilder.setInsertionPointAfter(valOp);
+        auto copyVal =
+            firOpBuilder.createTemporary(val.getLoc(), val.getType());
+        firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
 
-    // Check if cloning the bounds introduced any dependency on the outer
-    // region. If so, then either clone them as well if they are
-    // MemoryEffectFree, or else copy them to a new temporary and add them to
-    // the map and block_argument lists and replace their uses with the new
-    // temporary.
-    llvm::SetVector<mlir::Value> valuesDefinedAbove;
-    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
-    while (!valuesDefinedAbove.empty()) {
-      for (mlir::Value val : valuesDefinedAbove) {
-        mlir::Operation *valOp = val.getDefiningOp();
-        if (mlir::isMemoryEffectFree(valOp)) {
-          mlir::Operation *clonedOp = valOp->clone();
-          regionBlock->push_front(clonedOp);
-          val.replaceUsesWithIf(
-              clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
-                return use.getOwner()->getBlock() == regionBlock;
-              });
-        } else {
-          auto savedIP = firOpBuilder.getInsertionPoint();
-          firOpBuilder.setInsertionPointAfter(valOp);
-          auto copyVal =
-              firOpBuilder.createTemporary(val.getLoc(), val.getType());
-          firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
-
-          llvm::SmallVector<mlir::Value> bounds;
-          std::stringstream name;
-          firOpBuilder.setInsertionPoint(&op);
-          mlir::Value mapOp = createMapInfoOp(
-              firOpBuilder, copyVal.getLoc(), copyVal,
-              /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
-              /*members=*/llvm::SmallVector<mlir::Value>{},
-              /*membersIndex=*/mlir::DenseIntElementsAttr{},
-              static_cast<
-                  std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                  llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
-              mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
-
-          if (isTargetOp)
-            mlir::cast<mlir::omp::TargetOp>(op).getMapVarsMutable().append(
-                mapOp);
-          else
-            mlir::cast<mlir::omp::TargetDataOp>(op).getMapVarsMutable().append(
-                mapOp);
-
-          mlir::Value clonedValArg =
-              region.addArgument(copyVal.getType(), copyVal.getLoc());
-          firOpBuilder.setInsertionPointToStart(regionBlock);
-          auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
-                                                         clonedValArg);
-          val.replaceUsesWithIf(
-              loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
-                return use.getOwner()->getBlock() == regionBlock;
-              });
-          firOpBuilder.setInsertionPoint(regionBlock, savedIP);
-        }
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream name;
+        firOpBuilder.setInsertionPoint(targetOp);
+        mlir::Value mapOp = createMapInfoOp(
+            firOpBuilder, copyVal.getLoc(), copyVal,
+            /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
+            /*members=*/llvm::SmallVector<mlir::Value>{},
+            /*membersIndex=*/mlir::DenseIntElementsAttr{},
+            static_cast<
+                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
+            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
+
+        mlir::cast<mlir::omp::TargetOp>(targetOp).getMapVarsMutable().append(
+            mapOp);
+
+        mlir::Value clonedValArg =
+            region.addArgument(copyVal.getType(), copyVal.getLoc());
+        firOpBuilder.setInsertionPointToStart(regionBlock);
+        auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
+                                                       clonedValArg);
+        val.replaceUsesWithIf(
+            loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
+              return use.getOwner()->getBlock() == regionBlock;
+            });
+        firOpBuilder.setInsertionPoint(regionBlock, savedIP);
       }
-      valuesDefinedAbove.clear();
-      mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
     }
+    valuesDefinedAbove.clear();
+    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
   }
 
   // Insert dummy instruction to remember the insertion position. The
@@ -1015,7 +946,7 @@ static void genBodyOfTargetAndDataOp(
   // In the HLFIR flow there are hlfir.declares inserted above while
   // setting block arguments.
   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
-      op.getLoc(), firOpBuilder.getIndexType());
+      targetOp.getLoc(), firOpBuilder.getIndexType());
 
   // Create blocks for unstructured regions. This has to be done since
   // blocks are initially allocated with the function as the parent region.
@@ -1036,9 +967,8 @@ static void genBodyOfTargetAndDataOp(
   // within the region, binding them to the member symbol for the scope of the
   // region so that subsequent code generation within the region will utilise
   // our new member accesses we have created.
-  if (isTargetOp)
-    genIntermediateCommonBlockAccessors(converter, currentLocation, region,
-                                        mapSyms);
+  genIntermediateCommonBlockAccessors(converter, currentLocation, region,
+                                      mapSyms);
 
   if (ConstructQueue::const_iterator next = std::next(item);
       next != queue.end()) {
@@ -1048,8 +978,7 @@ static void genBodyOfTargetAndDataOp(
     genNestedEvaluations(converter, eval);
   }
 
-  if (isTargetOp)
-    dsp->processStep2(mlir::cast<mlir::omp::TargetOp>(op), /*isLoop=*/false);
+  dsp.processStep2(mlir::cast<mlir::omp::TargetOp>(targetOp), /*isLoop=*/false);
 }
 
 template <typename OpTy, typename... Args>
@@ -1852,9 +1781,8 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
-  genBodyOfTargetAndDataOp(converter, symTable, semaCtx, eval,
-                           *targetOp.getOperation(), mapSyms, mapLocs, mapTypes,
-                           loc, queue, item, &dsp);
+  genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
+                    mapLocs, mapTypes, loc, queue, item, dsp);
   return targetOp;
 }
 
@@ -1875,10 +1803,9 @@ genTargetDataOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   auto targetDataOp =
       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
                                                                   clauseOps);
-  genBodyOfTargetAndDataOp(converter, symTable, semaCtx, eval,
-                           *targetDataOp.getOperation(), useDeviceSyms,
-                           useDeviceLocs, useDeviceTypes, loc, queue, item,
-                           /*dsp=*/nullptr, /*isTargetOp=*/false);
+  genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp,
+                        useDeviceSyms, useDeviceLocs, useDeviceTypes, loc,
+                        queue, item);
   return targetDataOp;
 }
 



More information about the flang-commits mailing list