[flang-commits] [flang] [OpenMP][MLIR] OMPEarlyOutliningPass removal (PR #67319)

Akash Banerjee via flang-commits flang-commits at lists.llvm.org
Mon Oct 23 10:31:23 PDT 2023


https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/67319

>From 5b4ff743cce5fe255b6483f3ed8240eef00158ca Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Fri, 22 Sep 2023 18:12:06 +0100
Subject: [PATCH 1/3] [OpenMP][Flang] Add "IsolatedFromAbove" trait to
 omp.target

This patch adds the PFT lowering changes required for adding the IsolatedFromAbove and OutlineableOpenMPOpInterface traits to omp.target.

Key Changes:
	- Add IsolatedFromAbove and OutlineableOpenMPOpInterface traits to target op in MLIR.
	- Main reason for this change is to prevent CSE and other similar optimisations from crossing region boundaries for target operations. The link below has the discourse discussion surrounding this issue.
	- Move implicit operand capturing to the PFT lowering stage.
	- Update related tests.

Related discussion: https://discourse.llvm.org/t/rfc-prevent-cse-from-removing-expressions-inside-some-non-isolatedfromabove-operation-regions/73150
---
 flang/lib/Lower/OpenMP.cpp                    | 253 +++++++++++++++---
 .../Fir/convert-to-llvm-openmp-and-fir.fir    |  10 +-
 flang/test/Lower/OpenMP/FIR/array-bounds.f90  | 119 +++-----
 flang/test/Lower/OpenMP/FIR/location.f90      |   2 +-
 flang/test/Lower/OpenMP/FIR/target.f90        | 106 ++++++--
 flang/test/Lower/OpenMP/array-bounds.f90      |   6 +-
 flang/test/Lower/OpenMP/location.f90          |   2 +-
 .../test/Lower/OpenMP/map-types-and-sizes.f90 |  57 ++++
 flang/test/Lower/OpenMP/target-map_types.f90  |  57 ++++
 flang/test/Lower/OpenMP/target.f90            | 113 ++++++++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  33 +--
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  76 ++++++
 12 files changed, 685 insertions(+), 149 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/map-types-and-sizes.f90
 create mode 100644 flang/test/Lower/OpenMP/target-map_types.f90
 create mode 100644 flang/test/Lower/OpenMP/target.f90

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 5f5e968eaaa6414..68f2c036e2f5132 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -534,11 +534,21 @@ class ClauseProcessor {
             mlir::Value &result) const;
   bool
   processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+
+  // This method is used to process a map clause.
+  // The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to
+  // store the original type, location and Fortran symbol for the map operands.
+  // They may be used later on to create the block_arguments for some of the
+  // target directives that require it.
   bool processMap(mlir::Location currentLocation,
                   const llvm::omp::Directive &directive,
                   Fortran::semantics::SemanticsContext &semanticsContext,
                   Fortran::lower::StatementContext &stmtCtx,
-                  llvm::SmallVectorImpl<mlir::Value> &mapOperands) const;
+                  llvm::SmallVectorImpl<mlir::Value> &mapOperands,
+                  llvm::SmallVectorImpl<mlir::Type> *mapSymTypes = nullptr,
+                  llvm::SmallVectorImpl<mlir::Location> *mapSymLocs = nullptr,
+                  llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
+                      *mapSymbols = nullptr) const;
   bool processReduction(
       mlir::Location currentLocation,
       llvm::SmallVectorImpl<mlir::Value> &reductionVars,
@@ -1665,29 +1675,25 @@ static mlir::omp::MapInfoOp
 createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
                 mlir::Value baseAddr, std::stringstream &name,
                 mlir::SmallVector<mlir::Value> bounds, uint64_t mapType,
-                mlir::omp::VariableCaptureKind mapCaptureType, bool implicit,
-                mlir::Type retTy) {
-  mlir::Value varPtrPtr;
+                mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
+                bool isVal = false) {
+  mlir::Value val, varPtr, varPtrPtr;
+
   if (auto boxTy = baseAddr.getType().dyn_cast<fir::BaseBoxType>()) {
     baseAddr = builder.create<fir::BoxAddrOp>(loc, baseAddr);
     retTy = baseAddr.getType();
   }
 
-  mlir::omp::MapInfoOp op =
-      builder.create<mlir::omp::MapInfoOp>(loc, retTy, baseAddr);
-  op.setNameAttr(builder.getStringAttr(name.str()));
-  op.setImplicit(implicit);
-  op.setMapType(mapType);
-  op.setMapCaptureType(mapCaptureType);
-
-  unsigned insPos = 1;
-  if (varPtrPtr)
-    op->insertOperands(insPos++, varPtrPtr);
-  if (bounds.size() > 0)
-    op->insertOperands(insPos, bounds);
-  op->setAttr(mlir::omp::MapInfoOp::getOperandSegmentSizeAttr(),
-              builder.getDenseI32ArrayAttr(
-                  {1, varPtrPtr ? 1 : 0, static_cast<int32_t>(bounds.size())}));
+  if (isVal)
+    val = baseAddr;
+  else
+    varPtr = baseAddr;
+
+  mlir::omp::MapInfoOp op = builder.create<mlir::omp::MapInfoOp>(
+      loc, retTy, val, varPtr, varPtrPtr, bounds,
+      builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
+      builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
+      builder.getStringAttr(name.str()));
   return op;
 }
 
@@ -1695,7 +1701,11 @@ bool ClauseProcessor::processMap(
     mlir::Location currentLocation, const llvm::omp::Directive &directive,
     Fortran::semantics::SemanticsContext &semanticsContext,
     Fortran::lower::StatementContext &stmtCtx,
-    llvm::SmallVectorImpl<mlir::Value> &mapOperands) const {
+    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
+    llvm::SmallVectorImpl<mlir::Type> *mapSymTypes,
+    llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSymbols)
+    const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   return findRepeatableClause<ClauseTy::Map>(
       [&](const ClauseTy::Map *mapClause,
@@ -1755,13 +1765,20 @@ bool ClauseProcessor::processMap(
           // Explicit map captures are captured ByRef by default,
           // optimisation passes may alter this to ByCopy or other capture
           // types to optimise
-          mapOperands.push_back(createMapInfoOp(
+          mlir::Value mapOp = createMapInfoOp(
               firOpBuilder, clauseLocation, baseAddr, asFortran, bounds,
               static_cast<
                   std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
                   mapTypeBits),
-              mlir::omp::VariableCaptureKind::ByRef, false,
-              baseAddr.getType()));
+              mlir::omp::VariableCaptureKind::ByRef, baseAddr.getType());
+
+          mapOperands.push_back(mapOp);
+          if (mapSymTypes)
+            mapSymTypes->push_back(baseAddr.getType());
+          if (mapSymLocs)
+            mapSymLocs->push_back(baseAddr.getLoc());
+          if (mapSymbols)
+            mapSymbols->push_back(getOmpObjectSymbol(ompObject));
         }
       });
 }
@@ -2142,7 +2159,7 @@ static void createBodyOfOp(
   }
 }
 
-static void createBodyOfTargetDataOp(
+static void genBodyOfTargetDataOp(
     Fortran::lower::AbstractConverter &converter, mlir::omp::DataOp &dataOp,
     const llvm::SmallVector<mlir::Type> &useDeviceTypes,
     const llvm::SmallVector<mlir::Location> &useDeviceLocs,
@@ -2356,8 +2373,8 @@ genDataOp(Fortran::lower::AbstractConverter &converter,
   auto dataOp = converter.getFirOpBuilder().create<mlir::omp::DataOp>(
       currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands,
       deviceAddrOperands, mapOperands);
-  createBodyOfTargetDataOp(converter, dataOp, useDeviceTypes, useDeviceLocs,
-                           useDeviceSymbols, currentLocation);
+  genBodyOfTargetDataOp(converter, dataOp, useDeviceTypes, useDeviceLocs,
+                        useDeviceSymbols, currentLocation);
   return dataOp;
 }
 
@@ -2400,6 +2417,76 @@ genEnterExitDataOp(Fortran::lower::AbstractConverter &converter,
                                    deviceOperand, nowaitAttr, mapOperands);
 }
 
+static void genBodyOfTargetOp(
+    Fortran::lower::AbstractConverter &converter, mlir::omp::TargetOp &targetOp,
+    const llvm::SmallVector<mlir::Type> &mapSymTypes,
+    const llvm::SmallVector<mlir::Location> &mapSymLocs,
+    const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
+    const mlir::Location &currentLocation) {
+  assert(mapSymTypes.size() == mapSymLocs.size());
+
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Region &region = targetOp.getRegion();
+
+  firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
+  firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
+  firOpBuilder.setInsertionPointToStart(&region.front());
+
+  unsigned argIndex = 0;
+  unsigned blockArgsIndex = mapSymbols.size();
+
+  auto extractBoundArgs = [&](auto n) {
+    llvm::SmallVector<mlir::Value> argExtents;
+    while (n--) {
+      argExtents.push_back(fir::getBase(region.getArgument(blockArgsIndex)));
+      blockArgsIndex++;
+    }
+    return argExtents;
+  };
+
+  for (const Fortran::semantics::Symbol *sym : mapSymbols) {
+    const mlir::BlockArgument &arg = region.getArgument(argIndex);
+    fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
+    mlir::Value val = fir::getBase(arg);
+    extVal.match(
+        [&](const fir::BoxValue &v) {
+          converter.bindSymbol(
+              *sym,
+              fir::BoxValue(val, extractBoundArgs(v.getLBounds().size()),
+                            v.getExplicitParameters(), v.getExplicitExtents()));
+        },
+        [&](const fir::MutableBoxValue &v) {
+          converter.bindSymbol(
+              *sym,
+              fir::MutableBoxValue(val, extractBoundArgs(v.getLBounds().size()),
+                                   v.getMutableProperties()));
+        },
+        [&](const fir::ArrayBoxValue &v) {
+          converter.bindSymbol(
+              *sym,
+              fir::ArrayBoxValue(val, extractBoundArgs(v.getExtents().size()),
+                                 extractBoundArgs(v.getLBounds().size()),
+                                 v.getSourceBox()));
+        },
+        [&](const fir::CharArrayBoxValue &v) {
+          converter.bindSymbol(
+              *sym,
+              fir::CharArrayBoxValue(val, v.getLen(),
+                                     extractBoundArgs(v.getExtents().size()),
+                                     extractBoundArgs(v.getLBounds().size())));
+        },
+        [&](const fir::CharBoxValue &v) {
+          converter.bindSymbol(*sym, fir::CharBoxValue(val, v.getLen()));
+        },
+        [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, val); },
+        [&](const auto &) {
+          TODO(converter.getCurrentLocation(),
+               "target map clause operand unsupported type");
+        });
+    argIndex++;
+  }
+}
+
 static mlir::omp::TargetOp
 genTargetOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::pft::Evaluation &eval,
@@ -2411,6 +2498,9 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand;
   mlir::UnitAttr nowaitAttr;
   llvm::SmallVector<mlir::Value> mapOperands;
+  llvm::SmallVector<mlir::Type> mapSymTypes;
+  llvm::SmallVector<mlir::Location> mapSymLocs;
+  llvm::SmallVector<const Fortran::semantics::Symbol *> mapSymbols;
 
   ClauseProcessor cp(converter, clauseList);
   cp.processIf(stmtCtx,
@@ -2420,7 +2510,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
   cp.processThreadLimit(stmtCtx, threadLimitOperand);
   cp.processNowait(nowaitAttr);
   cp.processMap(currentLocation, directive, semanticsContext, stmtCtx,
-                mapOperands);
+                mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols);
   cp.processTODO<Fortran::parser::OmpClause::Private,
                  Fortran::parser::OmpClause::Depend,
                  Fortran::parser::OmpClause::Firstprivate,
@@ -2433,10 +2523,111 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
                  Fortran::parser::OmpClause::Defaultmap>(
       currentLocation, llvm::omp::Directive::OMPD_target);
 
-  return genOpWithBody<mlir::omp::TargetOp>(
-      converter, eval, currentLocation, outerCombined, &clauseList,
-      ifClauseOperand, deviceOperand, threadLimitOperand, nowaitAttr,
-      mapOperands);
+  auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
+    if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
+      mlir::Value baseOp = converter.getSymbolAddress(sym);
+      if (!baseOp)
+        if (const auto *details = sym.template detailsIf<
+                                  Fortran::semantics::HostAssocDetails>()) {
+          baseOp = converter.getSymbolAddress(details->symbol());
+          converter.copySymbolBinding(details->symbol(), sym);
+        }
+
+      if (baseOp) {
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream name;
+        fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
+        name << sym.name().ToString();
+
+        mlir::Value baseAddr =
+            getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym,
+                                   converter.getCurrentLocation());
+        if (fir::unwrapRefType(baseAddr.getType()).isa<fir::BaseBoxType>())
+          bounds =
+              Fortran::lower::genBoundsOpsFromBox<mlir::omp::DataBoundsOp,
+                                                  mlir::omp::DataBoundsType>(
+                  converter.getFirOpBuilder(), converter.getCurrentLocation(),
+                  converter, dataExv, baseAddr);
+        if (fir::unwrapRefType(baseAddr.getType()).isa<fir::SequenceType>())
+          bounds = Fortran::lower::genBaseBoundsOps<mlir::omp::DataBoundsOp,
+                                                    mlir::omp::DataBoundsType>(
+              converter.getFirOpBuilder(), converter.getCurrentLocation(),
+              converter, dataExv, baseAddr);
+
+        llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+        if (auto refType = baseOp.getType().dyn_cast<fir::ReferenceType>()) {
+          auto eleType = refType.getElementType();
+          if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+            mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+          } else if (fir::isa_box_type(eleType)) {
+            mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+            mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+          }
+        }
+
+        mlir::Value mapOp = createMapInfoOp(
+            converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, name, bounds,
+            static_cast<
+                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                mapFlag),
+            mlir::omp::VariableCaptureKind::ByCopy, baseOp.getType());
+
+        mapOperands.push_back(mapOp);
+        mapSymTypes.push_back(baseOp.getType());
+        mapSymLocs.push_back(baseOp.getLoc());
+        mapSymbols.push_back(&sym);
+      }
+    }
+  };
+  Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
+
+  // Add the bounds and extents for box values to mapOperands
+  auto addBoundsInfo = [&](const auto &bounds) {
+    for (auto &val : bounds) {
+      mapSymLocs.push_back(val.getLoc());
+      mapSymTypes.push_back(val.getType());
+
+      llvm::SmallVector<mlir::Value> bounds;
+      std::stringstream name;
+
+      mlir::Value mapOp = createMapInfoOp(
+          converter.getFirOpBuilder(), val.getLoc(), val, name, bounds,
+          static_cast<
+              std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+              llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+              llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
+          mlir::omp::VariableCaptureKind::ByCopy, val.getType(), true);
+      mapOperands.push_back(mapOp);
+    }
+  };
+
+  for (const Fortran::semantics::Symbol *sym : mapSymbols) {
+    fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
+    extVal.match(
+        [&](const fir::BoxValue &v) { addBoundsInfo(v.getLBounds()); },
+        [&](const fir::MutableBoxValue &v) { addBoundsInfo(v.getLBounds()); },
+        [&](const fir::ArrayBoxValue &v) {
+          addBoundsInfo(v.getExtents());
+          addBoundsInfo(v.getLBounds());
+        },
+        [&](const fir::CharArrayBoxValue &v) {
+          addBoundsInfo(v.getExtents());
+          addBoundsInfo(v.getLBounds());
+        },
+        [&](const auto &) {
+          // Nothing to do for non-box values.
+        });
+  }
+
+  auto targetOp = converter.getFirOpBuilder().create<mlir::omp::TargetOp>(
+      currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand,
+      nowaitAttr, mapOperands);
+
+  genBodyOfTargetOp(converter, targetOp, mapSymTypes, mapSymLocs, mapSymbols,
+                    currentLocation);
+
+  return targetOp;
 }
 
 static mlir::omp::TeamsOp
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index ba1a15bf773d2ef..1123e07e6c690aa 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -433,12 +433,13 @@ func.func @_QPomp_target() {
   %1 = arith.subi %c512, %c1 : index
   %2 = omp.bounds   lower_bound(%c0 : index) upper_bound(%1 : index) extent(%c512 : index) stride(%c1 : index) start_idx(%c1 : index)
   %3 = omp.map_info var_ptr(%0 : !fir.ref<!fir.array<512xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%2) -> !fir.ref<!fir.array<512xi32>> {name = "a"}
-  omp.target   thread_limit(%c64_i32 : i32) map_entries(%3 : !fir.ref<!fir.array<512xi32>>) {
+  omp.target   thread_limit(%c64_i32 : i32) map_entries(%3 -> %arg0 : !fir.ref<!fir.array<512xi32>>) {
+    ^bb0(%arg0: !fir.ref<!fir.array<512xi32>>):
     %c10_i32 = arith.constant 10 : i32
     %c1_i64 = arith.constant 1 : i64
     %c1_i64_0 = arith.constant 1 : i64
     %4 = arith.subi %c1_i64, %c1_i64_0 : i64
-    %5 = fir.coordinate_of %0, %4 : (!fir.ref<!fir.array<512xi32>>, i64) -> !fir.ref<i32>
+    %5 = fir.coordinate_of %arg0, %4 : (!fir.ref<!fir.array<512xi32>>, i64) -> !fir.ref<i32>
     fir.store %c10_i32 to %5 : !fir.ref<i32>
     omp.terminator
   }
@@ -455,12 +456,13 @@ func.func @_QPomp_target() {
 // CHECK:           %[[UPPER:.*]] = llvm.mlir.constant(511 : index) : i64
 // CHECK:           %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[LOWER]] : i64) upper_bound(%[[UPPER]] : i64) extent(%[[EXTENT]] : i64) stride(%[[STRIDE]] : i64) start_idx(%[[STRIDE]] : i64)
 // CHECK:           %[[MAP:.*]] = omp.map_info var_ptr(%2 : !llvm.ptr<array<512 x i32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr<array<512 x i32>> {name = "a"}
-// CHECK:           omp.target   thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] : !llvm.ptr<array<512 x i32>>) {
+// CHECK:           omp.target   thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !llvm.ptr<array<512 x i32>>) {
+// CHECK:           ^bb0(%[[ARG_0]]: !llvm.ptr<array<512 x i32>>):
 // CHECK:             %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32
 // CHECK:             %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64
 // CHECK:             %[[VAL_5:.*]] = llvm.mlir.constant(1 : i64) : i64
 // CHECK:             %[[VAL_6:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK:             %[[VAL_7:.*]] = llvm.getelementptr %[[VAL_1]][0, %[[VAL_6]]] : (!llvm.ptr<array<512 x i32>>, i64) -> !llvm.ptr<i32>
+// CHECK:             %[[VAL_7:.*]] = llvm.getelementptr %[[ARG_0]][0, %[[VAL_6]]] : (!llvm.ptr<array<512 x i32>>, i64) -> !llvm.ptr<i32>
 // CHECK:             llvm.store %[[VAL_3]], %[[VAL_7]] : !llvm.ptr<i32>
 // CHECK:             omp.terminator
 // CHECK:           }
diff --git a/flang/test/Lower/OpenMP/FIR/array-bounds.f90 b/flang/test/Lower/OpenMP/FIR/array-bounds.f90
index 748257f8dcc3024..c0e4c1f5af3bd5f 100644
--- a/flang/test/Lower/OpenMP/FIR/array-bounds.f90
+++ b/flang/test/Lower/OpenMP/FIR/array-bounds.f90
@@ -1,37 +1,22 @@
-!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes HOST
-!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes DEVICE
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s --check-prefixes=HOST,ALL
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=DEVICE,ALL
 
-!DEVICE-LABEL: func.func @_QPread_write_section_omp_outline_0(
-!DEVICE-SAME: %[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<!fir.array<10xi32>>, %[[ARG2:.*]]: !fir.ref<!fir.array<10xi32>>) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QPread_write_section"} {
-!DEVICE:  %[[C1:.*]] = arith.constant 1 : index
-!DEVICE:  %[[C2:.*]] = arith.constant 4 : index
-!DEVICE:  %[[C3:.*]] = arith.constant 1 : index
-!DEVICE:  %[[C4:.*]] = arith.constant 1 : index
-!DEVICE:  %[[BOUNDS0:.*]] = omp.bounds   lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
-!DEVICE:  %[[MAP0:.*]] = omp.map_info var_ptr(%[[ARG1]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
-!DEVICE:  %[[C5:.*]] = arith.constant 1 : index
-!DEVICE:  %[[C6:.*]] = arith.constant 4 : index
-!DEVICE:  %[[C7:.*]] = arith.constant 1 : index
-!DEVICE:  %[[C8:.*]] = arith.constant 1 : index
-!DEVICE:  %[[BOUNDS1:.*]] = omp.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C8]] : index) start_idx(%[[C8]] : index)
-!DEVICE:  %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG2]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
-!DEVICE:  omp.target   map_entries(%[[MAP0]], %[[MAP1]] : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>) {
-
-!HOST-LABEL:  func.func @_QPread_write_section() {
-!HOST:  %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFread_write_sectionEi"}
-!HOST:  %[[READ:.*]] = fir.address_of(@_QFread_write_sectionEsp_read) : !fir.ref<!fir.array<10xi32>>
-!HOST:  %[[WRITE:.*]] = fir.address_of(@_QFread_write_sectionEsp_write) : !fir.ref<!fir.array<10xi32>>
-!HOST:  %[[C1:.*]] = arith.constant 1 : index
-!HOST:  %[[C2:.*]] = arith.constant 1 : index
-!HOST:  %[[C3:.*]] = arith.constant 4 : index
-!HOST:  %[[BOUNDS0:.*]] = omp.bounds   lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
-!HOST:  %[[MAP0:.*]] = omp.map_info var_ptr(%[[READ]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
-!HOST:  %[[C4:.*]] = arith.constant 1 : index
-!HOST:  %[[C5:.*]] = arith.constant 1 : index
-!HOST:  %[[C6:.*]] = arith.constant 4 : index
-!HOST:  %[[BOUNDS1:.*]] = omp.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
-!HOST:  %[[MAP1:.*]] = omp.map_info var_ptr(%[[WRITE]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
-!HOST:  omp.target   map_entries(%[[MAP0]], %[[MAP1]] : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>) {
+!ALL-LABEL: func.func @_QPread_write_section(
+!ALL:  %[[ITER:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFread_write_sectionEi"}
+!ALL:  %[[READ:.*]] = fir.address_of(@_QFread_write_sectionEsp_read) : !fir.ref<!fir.array<10xi32>>
+!ALL:  %[[WRITE:.*]] = fir.address_of(@_QFread_write_sectionEsp_write) : !fir.ref<!fir.array<10xi32>>
+!ALL:  %[[C1:.*]] = arith.constant 1 : index
+!ALL:  %[[C2:.*]] = arith.constant 1 : index
+!ALL:  %[[C3:.*]] = arith.constant 4 : index
+!ALL:  %[[BOUNDS0:.*]] = omp.bounds   lower_bound(%[[C2]] : index) upper_bound(%[[C3]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
+!ALL:  %[[MAP0:.*]] = omp.map_info var_ptr(%[[READ]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS0]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_read(2:5)"}
+!ALL:  %[[C4:.*]] = arith.constant 1 : index
+!ALL:  %[[C5:.*]] = arith.constant 1 : index
+!ALL:  %[[C6:.*]] = arith.constant 4 : index
+!ALL:  %[[BOUNDS1:.*]] = omp.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
+!ALL:  %[[MAP1:.*]] = omp.map_info var_ptr(%[[WRITE]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
+!ALL:  %[[MAP2:.*]] = omp.map_info var_ptr(%[[ITER]] : !fir.ref<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
+!ALL: omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, %[[MAP2]] -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, !fir.ref<i32>, index, index) {
 
 subroutine read_write_section()
     integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/)
@@ -44,33 +29,22 @@ subroutine read_write_section()
 !$omp end target
 end subroutine read_write_section
 
-
 module assumed_array_routines
-    contains
-!DEVICE-LABEL: func.func @_QMassumed_array_routinesPassumed_shape_array_omp_outline_0(
-!DEVICE-SAME: %[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG2:.*]]: !fir.ref<!fir.array<?xi32>>) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QMassumed_array_routinesPassumed_shape_array"} {
-!DEVICE: %[[C0:.*]] = arith.constant 1 : index
-!DEVICE: %[[C1:.*]] = arith.constant 4 : index
-!DEVICE: %[[C2:.*]] = arith.constant 0 : index
-!DEVICE: %[[C3:.*]]:3 = fir.box_dims %[[ARG1]], %[[C2]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-!DEVICE: %[[C4:.*]] = arith.constant 1 : index
-!DEVICE: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C0]] : index) upper_bound(%[[C1]] : index) stride(%[[C3]]#2 : index) start_idx(%[[C4]] : index) {stride_in_bytes = true}
-!DEVICE: %[[ARGADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
-!DEVICE: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARGADDR]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-!DEVICE: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<?xi32>>) {
+contains
+!ALL-LABEL: func.func @_QMassumed_array_routinesPassumed_shape_array(
+!ALL-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr_read_write"}) {
+!ALL: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_shape_arrayEi"}
+!ALL: %[[C0:.*]] = arith.constant 1 : index
+!ALL: %[[C1:.*]] = arith.constant 0 : index
+!ALL: %[[C2:.*]]:3 = fir.box_dims %arg0, %[[C1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+!ALL: %[[C3:.*]] = arith.constant 1 : index
+!ALL: %[[C4:.*]] = arith.constant 4 : index
+!ALL: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) stride(%[[C2]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true}
+!ALL: %[[ADDROF:.*]] = fir.box_addr %arg0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+!ALL: %[[MAP:.*]] = omp.map_info var_ptr(%[[ADDROF]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
+!ALL: %[[MAP2:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !fir.ref<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
+!ALL: omp.target map_entries(%[[MAP]] -> %{{.*}}, %[[MAP2]] -> %{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>) {
 
-!HOST-LABEL: func.func @_QMassumed_array_routinesPassumed_shape_array(
-!HOST-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "arr_read_write"}) {
-!HOST: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_shape_arrayEi"}
-!HOST: %[[C0:.*]] = arith.constant 1 : index
-!HOST: %[[C1:.*]] = arith.constant 0 : index
-!HOST: %[[C2:.*]]:3 = fir.box_dims %arg0, %[[C1]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-!HOST: %[[C3:.*]] = arith.constant 1 : index
-!HOST: %[[C4:.*]] = arith.constant 4 : index
-!HOST: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) stride(%[[C2]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true}
-!HOST: %[[ADDROF:.*]] = fir.box_addr %arg0 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
-!HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ADDROF]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-!HOST: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<?xi32>>) {
     subroutine assumed_shape_array(arr_read_write)
             integer, intent(inout) :: arr_read_write(:)
 
@@ -81,25 +55,17 @@ subroutine assumed_shape_array(arr_read_write)
         !$omp end target
         end subroutine assumed_shape_array
 
-!DEVICE-LABEL:   func.func @_QMassumed_array_routinesPassumed_size_array_omp_outline_0(
-!DEVICE-SAME:    %[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<!fir.array<?xi32>>) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QMassumed_array_routinesPassumed_size_array"} {
-!DEVICE: %[[C0:.*]] = arith.constant 1 : index
-!DEVICE: %[[C1:.*]] = arith.constant 4 : index
-!DEVICE: %[[C2:.*]] = arith.constant 1 : index
-!DEVICE: %[[C3:.*]] = arith.constant 1 : index
-!DEVICE: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C0]] : index) upper_bound(%[[C1]] : index) stride(%[[C3]] : index) start_idx(%[[C3]] : index)
-!DEVICE: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG1]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-!DEVICE: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<?xi32>>) {
+!ALL-LABEL:   func.func @_QMassumed_array_routinesPassumed_size_array(
+!ALL-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xi32>> {fir.bindc_name = "arr_read_write"}) {
+!ALL: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEi"}
+!ALL: %[[C0:.*]] = arith.constant 1 : index
+!ALL: %[[C1:.*]] = arith.constant 1 : index
+!ALL: %[[C2:.*]] = arith.constant 4 : index
+!ALL: %[[BOUNDS:.*]]  = omp.bounds   lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C0]] : index) start_idx(%[[C0]] : index)
+!ALL: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG0]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
+!ALL: %[[MAP2:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !fir.ref<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
+!ALL: omp.target map_entries(%[[MAP]] -> %{{.*}}, %[[MAP2]] -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>, index) {
 
-!HOST-LABEL: func.func @_QMassumed_array_routinesPassumed_size_array(
-!HOST-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xi32>> {fir.bindc_name = "arr_read_write"}) {
-!HOST: %[[ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMassumed_array_routinesFassumed_size_arrayEi"}
-!HOST: %[[C0:.*]] = arith.constant 1 : index
-!HOST: %[[C1:.*]] = arith.constant 1 : index
-!HOST: %[[C2:.*]] = arith.constant 4 : index
-!HOST: %[[BOUNDS:.*]]  = omp.bounds   lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C0]] : index) start_idx(%[[C0]] : index)
-!HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG0]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-!HOST: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<?xi32>>) {
         subroutine assumed_size_array(arr_read_write)
             integer, intent(inout) :: arr_read_write(*)
 
@@ -111,6 +77,7 @@ subroutine assumed_size_array(arr_read_write)
         end subroutine assumed_size_array
     end module assumed_array_routines
 
+!DEVICE-NOT:func.func @_QPcall_assumed_shape_and_size_array() {
 
 !HOST-LABEL:func.func @_QPcall_assumed_shape_and_size_array() {
 !HOST:%{{.*}} = arith.constant 20 : index
diff --git a/flang/test/Lower/OpenMP/FIR/location.f90 b/flang/test/Lower/OpenMP/FIR/location.f90
index 0e36e09b19e1942..648377837670329 100644
--- a/flang/test/Lower/OpenMP/FIR/location.f90
+++ b/flang/test/Lower/OpenMP/FIR/location.f90
@@ -17,7 +17,7 @@ subroutine sub_parallel()
 !CHECK-LABEL: sub_target
 subroutine sub_target()
   print *, x
-!CHECK: omp.target  {
+!CHECK: omp.target {{.*}} {
   !$omp target
     print *, x
 !CHECK:   omp.terminator loc(#[[TAR_LOC:.*]])
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index 9b1fb5c15ac1d2d..90a5488d85c718f 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -189,13 +189,14 @@ subroutine omp_target
    integer :: a(1024)
    !CHECK: %[[BOUNDS:.*]] = omp.bounds   lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
    !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref<!fir.array<1024xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<1024xi32>> {name = "a"}
-   !CHECK: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<1024xi32>>) {
+   !CHECK: omp.target   map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}} : !fir.ref<!fir.array<1024xi32>>, index) {
+   !CHECK: ^bb0(%[[ARG_0]]: !fir.ref<!fir.array<1024xi32>>, %{{.*}}: index):
    !$omp target map(tofrom: a)
       !CHECK: %[[VAL_1:.*]] = arith.constant 10 : i32
       !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i64
       !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i64
       !CHECK: %[[VAL_4:.*]] = arith.subi %[[VAL_2]], %[[VAL_3]] : i64
-      !CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
+      !CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[ARG_0]], %[[VAL_4]] : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
       !CHECK: fir.store %[[VAL_1]] to %[[VAL_5]] : !fir.ref<i32>
       a(1) = 10
    !CHECK: omp.terminator
@@ -203,6 +204,72 @@ subroutine omp_target
    !CHECK: }
 end subroutine omp_target
 
+!===============================================================================
+! Target implicit capture
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_implicit() {
+subroutine omp_target_implicit
+   !CHECK: %[[VAL_0:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_implicitEa"}
+   integer :: a(1024)
+   !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref<!fir.array<1024xi32>>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) bounds(%{{.*}}) -> !fir.ref<!fir.array<1024xi32>> {name = "a"}
+   !CHECK: omp.target   map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}} : !fir.ref<!fir.array<1024xi32>>, index) {
+   !CHECK: ^bb0(%[[ARG_0]]: !fir.ref<!fir.array<1024xi32>>, %{{.*}}: index):
+   !$omp target
+      !CHECK: %[[VAL_5:.*]] = fir.coordinate_of %[[ARG_0]], %{{.*}} : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
+      a(1) = 10
+   !CHECK: omp.terminator
+   !$omp end target
+   !CHECK: }
+end subroutine omp_target_implicit
+
+!===============================================================================
+! Target implicit capture nested
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_implicit_nested() {
+subroutine omp_target_implicit_nested
+   integer::a, b
+   !CHECK: omp.target   map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   !CHECK: ^bb0(%[[ARG0]]: !fir.ref<i32>, %[[ARG1]]: !fir.ref<i32>):
+   !$omp target
+      !CHECK: fir.store %{{.*}} to %[[ARG0]] : !fir.ref<i32>
+      a = 10
+      !$omp parallel
+         !CHECK: fir.store %{{.*}} to %[[ARG1]] : !fir.ref<i32>
+         b = 20
+         !CHECK: omp.terminator
+      !$omp end parallel
+   !CHECK: omp.terminator
+   !$omp end target
+   !CHECK: }
+end subroutine omp_target_implicit_nested
+
+!===============================================================================
+! Target implicit capture with bounds
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_implicit_bounds(%{{.*}}: !fir.ref<i32> {fir.bindc_name = "n"}) {
+subroutine omp_target_implicit_bounds(n)
+   !CHECK: %[[VAL_1:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index
+   !CHECK: %[[VAL_2:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index
+   !CHECK: %[[VAL_3:.*]] = fir.alloca !fir.array<?x1024xi32>, %[[VAL_1]] {bindc_name = "a", uniq_name = "_QFomp_target_implicit_boundsEa"}
+   integer :: n
+   integer :: a(n, 1024)
+   !CHECK: %[[VAL_4:.*]] = omp.map_info var_ptr(%[[VAL_3]] : !fir.ref<!fir.array<?x1024xi32>>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) bounds(%{{.*}}) -> !fir.ref<!fir.array<?x1024xi32>> {name = "a"}
+   !CHECK: %[[VAL_5:.*]] = omp.map_info val(%[[VAL_1]] : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
+   !CHECK: %[[VAL_6:.*]] = omp.map_info val(%[[VAL_2]] : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
+   !CHECK: omp.target   map_entries(%[[VAL_4]] -> %[[ARG_1:.*]], %[[VAL_5]] -> %[[ARG_2:.*]], %[[VAL_6]] -> %[[ARG_3:.*]] : !fir.ref<!fir.array<?x1024xi32>>, index, index) {
+   !CHECK: ^bb0(%[[ARG_1]]: !fir.ref<!fir.array<?x1024xi32>>, %[[ARG_2]]: index, %[[ARG_3]]: index):
+   !$omp target
+      !CHECK: %{{.*}} = fir.convert %[[ARG_1]] : (!fir.ref<!fir.array<?x1024xi32>>) -> !fir.ref<!fir.array<?xi32>>
+      !CHECK: %{{.*}} = arith.muli %{{.*}}, %[[ARG_2]] : index
+      a(11,22) = 33
+      !CHECK: omp.terminator
+   !$omp end target
+!CHECK: }
+end subroutine omp_target_implicit_bounds
+
 !===============================================================================
 ! Target `thread_limit` clause
 !===============================================================================
@@ -212,7 +279,8 @@ subroutine omp_target_thread_limit
    integer :: a
    !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
    !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}})   map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a"}
-   !CHECK: omp.target   thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] : !fir.ref<i32>) {
+   !CHECK: omp.target   thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref<i32>) {
+   !CHECK: ^bb0(%[[ARG_0]]: !fir.ref<i32>):
    !$omp target map(tofrom: a) thread_limit(64)
       a = 10
    !CHECK: omp.terminator
@@ -274,23 +342,25 @@ subroutine omp_target_parallel_do
    !CHECK: %[[C0:.*]] = arith.constant 0 : index
    !CHECK: %[[SUB:.*]] = arith.subi %[[C1024]], %[[C1]] : index
    !CHECK: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C0]] : index) upper_bound(%[[SUB]] : index) extent(%[[C1024]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index)
-   !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref<!fir.array<1024xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<1024xi32>> {name = "a"}
-   !CHECK: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<1024xi32>>) {
+   !CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[VAL_0]] : !fir.ref<!fir.array<1024xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<1024xi32>> {name = "a"}
+   !CHECK: %[[MAP2:.*]] = omp.map_info var_ptr(%[[VAL_1]] : !fir.ref<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref<i32> {name = "i"}
+   !CHECK: omp.target map_entries(%[[MAP1]] -> %[[VAL_2:.*]], %[[MAP2]] -> %[[VAL_3:.*]], %{{.*}} -> %{{.*}} : !fir.ref<!fir.array<1024xi32>>, !fir.ref<i32>, index) {
+   !CHECK: ^bb0(%[[VAL_2]]: !fir.ref<!fir.array<1024xi32>>, %[[VAL_3]]: !fir.ref<i32>, %{{.*}}: index):
       !CHECK-NEXT: omp.parallel
       !$omp target parallel do map(tofrom: a)
-         !CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-         !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
-         !CHECK: %[[VAL_4:.*]] = arith.constant 1024 : i32
+         !CHECK: %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
          !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
-         !CHECK: omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
-         !CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<i32>
-         !CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32
-         !CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
-         !CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> i64
-         !CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64
-         !CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]], %[[VAL_10]] : i64
-         !CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_11]] : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
-         !CHECK: fir.store %[[VAL_7]] to %[[VAL_12]] : !fir.ref<i32>
+         !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32
+         !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
+         !CHECK: omp.wsloop   for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
+         !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref<i32>
+         !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
+         !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
+         !CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> i64
+         !CHECK: %[[VAL_12:.*]] = arith.constant 1 : i64
+         !CHECK: %[[VAL_13:.*]] = arith.subi %[[VAL_11]], %[[VAL_12]] : i64
+         !CHECK: %[[VAL_14:.*]] = fir.coordinate_of %[[VAL_2]], %[[VAL_13]] : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
+         !CHECK: fir.store %[[VAL_9]] to %[[VAL_14]] : !fir.ref<i32>
          do i = 1, 1024
             a(i) = 10
          end do
@@ -301,4 +371,4 @@ subroutine omp_target_parallel_do
    !CHECK: omp.terminator
    !CHECK: }
    !$omp end target parallel do
-end subroutine omp_target_parallel_do
+ end subroutine omp_target_parallel_do
diff --git a/flang/test/Lower/OpenMP/array-bounds.f90 b/flang/test/Lower/OpenMP/array-bounds.f90
index 831c7021d6b2ed9..68cc8753d642f89 100644
--- a/flang/test/Lower/OpenMP/array-bounds.f90
+++ b/flang/test/Lower/OpenMP/array-bounds.f90
@@ -22,7 +22,7 @@
 !HOST:  %[[C6:.*]] = arith.constant 4 : index
 !HOST:  %[[BOUNDS1:.*]] = omp.bounds   lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index)
 !HOST:  %[[MAP1:.*]] = omp.map_info var_ptr(%[[WRITE_DECL]]#1 : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
-!HOST:  omp.target   map_entries(%[[MAP0]], %[[MAP1]] : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>) {
+!HOST:  omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, {{.*}} -> {{.*}}, {{.*}} -> {{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, !fir.ref<i32>, index, index) {
 
 subroutine read_write_section()
     integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/)
@@ -50,7 +50,7 @@ module assumed_array_routines
 !HOST: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C3]] : index) upper_bound(%[[C4]] : index) stride(%[[C2]]#2 : index) start_idx(%[[C0]] : index) {stride_in_bytes = true}
 !HOST: %[[ADDROF:.*]] = fir.box_addr %[[ARG0_DECL]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 !HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ADDROF]] : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-!HOST: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<?xi32>>) {
+!HOST: omp.target   map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>) {
     subroutine assumed_shape_array(arr_read_write)
             integer, intent(inout) :: arr_read_write(:)
 
@@ -73,7 +73,7 @@ end subroutine assumed_shape_array
 !HOST: %[[C2:.*]] = arith.constant 4 : index
 !HOST: %[[BOUNDS:.*]]  = omp.bounds   lower_bound(%[[C1]] : index) upper_bound(%[[C2]] : index) stride(%[[C0]] : index) start_idx(%[[C0]] : index)
 !HOST: %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG0_DECL]]#1 : !fir.ref<!fir.array<?xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "arr_read_write(2:5)"}
-!HOST: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<?xi32>>) {
+!HOST: omp.target map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}}, {{.*}} -> {{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.ref<i32>, index) {
         subroutine assumed_size_array(arr_read_write)
             integer, intent(inout) :: arr_read_write(*)
 
diff --git a/flang/test/Lower/OpenMP/location.f90 b/flang/test/Lower/OpenMP/location.f90
index c87bf038e967215..1e01a4828dd9e1b 100644
--- a/flang/test/Lower/OpenMP/location.f90
+++ b/flang/test/Lower/OpenMP/location.f90
@@ -17,7 +17,7 @@ subroutine sub_parallel()
 !CHECK-LABEL: sub_target
 subroutine sub_target()
   print *, x
-!CHECK: omp.target  {
+!CHECK: omp.target {{.*}} {
   !$omp target
     print *, x
 !CHECK:   omp.terminator loc(#[[TAR_LOC:.*]])
diff --git a/flang/test/Lower/OpenMP/map-types-and-sizes.f90 b/flang/test/Lower/OpenMP/map-types-and-sizes.f90
new file mode 100644
index 000000000000000..0eab6d87f1619bd
--- /dev/null
+++ b/flang/test/Lower/OpenMP/map-types-and-sizes.f90
@@ -0,0 +1,57 @@
+!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+!===============================================================================
+! Check MapTypes for target implicit captures
+!===============================================================================
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 800]
+!CHECK-LABEL: define void @maptype_scalar_
+subroutine mapType_scalar
+  integer :: a
+  !$omp target
+     a = 10
+  !$omp end target
+end subroutine mapType_scalar
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4096]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+!CHECK-LABEL: define void @maptype_array_
+subroutine mapType_array
+  integer :: a(1024)
+  !$omp target
+     a(10) = 20
+  !$omp end target
+end subroutine mapType_array
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 0]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+!CHECK-LABEL: define void @mapType_ptr
+subroutine mapType_ptr
+  integer, pointer :: a
+  !$omp target
+     a = 10
+  !$omp end target
+end subroutine mapType_ptr
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 0, i64 4]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 544, i64 800]
+!CHECK-LABEL: define void @mapType_c_ptr
+subroutine mapType_c_ptr
+  use iso_c_binding, only : c_ptr, c_loc
+  type(c_ptr) :: a
+  integer, target :: b
+  !$omp target
+     a = c_loc(b)
+  !$omp end target
+end subroutine mapType_c_ptr
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 1]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 800]
+!CHECK-LABEL: define void @mapType_char
+subroutine mapType_char
+  character :: a
+  !$omp target
+     a = 'b'
+  !$omp end target
+end subroutine mapType_char
\ No newline at end of file
diff --git a/flang/test/Lower/OpenMP/target-map_types.f90 b/flang/test/Lower/OpenMP/target-map_types.f90
new file mode 100644
index 000000000000000..0eab6d87f1619bd
--- /dev/null
+++ b/flang/test/Lower/OpenMP/target-map_types.f90
@@ -0,0 +1,57 @@
+!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+!===============================================================================
+! Check MapTypes for target implicit captures
+!===============================================================================
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 800]
+!CHECK-LABEL: define void @maptype_scalar_
+subroutine mapType_scalar
+  integer :: a
+  !$omp target
+     a = 10
+  !$omp end target
+end subroutine mapType_scalar
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4096]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+!CHECK-LABEL: define void @maptype_array_
+subroutine mapType_array
+  integer :: a(1024)
+  !$omp target
+     a(10) = 20
+  !$omp end target
+end subroutine mapType_array
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 0]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+!CHECK-LABEL: define void @mapType_ptr
+subroutine mapType_ptr
+  integer, pointer :: a
+  !$omp target
+     a = 10
+  !$omp end target
+end subroutine mapType_ptr
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 0, i64 4]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 544, i64 800]
+!CHECK-LABEL: define void @mapType_c_ptr
+subroutine mapType_c_ptr
+  use iso_c_binding, only : c_ptr, c_loc
+  type(c_ptr) :: a
+  integer, target :: b
+  !$omp target
+     a = c_loc(b)
+  !$omp end target
+end subroutine mapType_c_ptr
+
+!CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 1]
+!CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 800]
+!CHECK-LABEL: define void @mapType_char
+subroutine mapType_char
+  character :: a
+  !$omp target
+     a = 'b'
+  !$omp end target
+end subroutine mapType_char
\ No newline at end of file
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
new file mode 100644
index 000000000000000..4ba04717a59bdaf
--- /dev/null
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -0,0 +1,113 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+!===============================================================================
+! Target with region
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target() {
+subroutine omp_target
+   !CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFomp_targetEa"} : (!fir.ref<!fir.array<1024xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
+   integer :: a(1024)
+   !CHECK: %[[BOUNDS:.*]] = omp.bounds   lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}})
+   !CHECK: %[[MAP:.*]] = omp.map_info var_ptr(%[[VAL_1]]#1 : !fir.ref<!fir.array<1024xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<1024xi32>> {name = "a"}
+   !CHECK: omp.target   map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %[[ARG_1:.*]] : !fir.ref<!fir.array<1024xi32>>, index) {
+   !CHECK: ^bb0(%[[ARG_0]]: !fir.ref<!fir.array<1024xi32>>, %[[ARG_1]]: index):
+   !$omp target map(tofrom: a)
+   !CHECK: %[[VAL_2:.*]] = fir.shape %[[ARG_1]] : (index) -> !fir.shape<1>
+   !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG_0]](%[[VAL_2]]) {uniq_name = "_QFomp_targetEa"} : (!fir.ref<!fir.array<1024xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
+   !CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32
+   !CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+   !CHECK: %[[VAL_6:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_5]])  : (!fir.ref<!fir.array<1024xi32>>, index) -> !fir.ref<i32>
+   !CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_6]] : i32, !fir.ref<i32>
+   a(1) = 10
+   !CHECK: omp.terminator
+   !$omp end target
+   !CHECK: }
+end subroutine omp_target
+
+!===============================================================================
+! Target implicit capture
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_implicit() {
+subroutine omp_target_implicit
+   !CHECK: %[[VAL_0:.*]] = arith.constant 1024 : index
+   !CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_implicitEa"}
+   !CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_0]] : (index) -> !fir.shape<1>
+   !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_2]]) {uniq_name = "_QFomp_target_implicitEa"} : (!fir.ref<!fir.array<1024xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
+   integer :: a(1024)
+   !CHECK: %[[VAL_4:.*]] = omp.map_info var_ptr(%[[VAL_3]]#1 : !fir.ref<!fir.array<1024xi32>>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) bounds(%{{.*}}) -> !fir.ref<!fir.array<1024xi32>> {name = "a"}
+   !CHECK: %[[VAL_5:.*]] = omp.map_info val(%[[VAL_0]] : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
+   !CHECK: omp.target   map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref<!fir.array<1024xi32>>, index) {
+   !CHECK: ^bb0(%[[VAL_6]]: !fir.ref<!fir.array<1024xi32>>, %[[VAL_7]]: index):
+   !$omp target
+   !CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+   !CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_8]]) {uniq_name = "_QFomp_target_implicitEa"} : (!fir.ref<!fir.array<1024xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xi32>>, !fir.ref<!fir.array<1024xi32>>)
+   !CHECK: %[[VAL_10:.*]] = arith.constant 10 : i32
+   !CHECK: %[[VAL_11:.*]] = arith.constant 1 : index
+   !CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_11]])  : (!fir.ref<!fir.array<1024xi32>>, index) -> !fir.ref<i32>
+   !CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_12]] : i32, !fir.ref<i32>
+   a(1) = 10
+   !CHECK: omp.terminator
+   !$omp end target
+   !CHECK: }
+end subroutine omp_target_implicit
+
+!===============================================================================
+! Target implicit capture nested
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_implicit_nested() {
+subroutine omp_target_implicit_nested
+   integer::a, b
+   !CHECK: omp.target   map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   !CHECK: ^bb0(%[[ARG0]]: !fir.ref<i32>, %[[ARG1]]: !fir.ref<i32>):
+   !$omp target
+   !CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_target_implicit_nestedEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+   !CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_target_implicit_nestedEb"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+   !CHECK: %[[VAL_10:.*]] = arith.constant 10 : i32
+   !CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_8]]#0 : i32, !fir.ref<i32>
+   a = 10
+   !CHECK: omp.parallel
+   !$omp parallel
+   !CHECK: %[[VAL_11:.*]] = arith.constant 20 : i32
+   !CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_9]]#0 : i32, !fir.ref<i32>
+   b = 20
+   !CHECK: omp.terminator
+   !$omp end parallel
+   !CHECK: omp.terminator
+   !$omp end target
+   !CHECK: }
+end subroutine omp_target_implicit_nested
+
+!===============================================================================
+! Target implicit capture with bounds
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_implicit_bounds(%{{.*}}: !fir.ref<i32> {fir.bindc_name = "n"}) {
+subroutine omp_target_implicit_bounds(n)
+   !CHECK: %[[VAL_1:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index
+   !CHECK: %[[VAL_2:.*]] = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index
+   !CHECK: %[[VAL_3:.*]] = fir.alloca !fir.array<?x1024xi32>, %[[VAL_1]] {bindc_name = "a", uniq_name = "_QFomp_target_implicit_boundsEa"}
+   !CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2>
+   !CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]](%[[VAL_4]]) {uniq_name = "_QFomp_target_implicit_boundsEa"} : (!fir.ref<!fir.array<?x1024xi32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x1024xi32>>, !fir.ref<!fir.array<?x1024xi32>>)
+   integer :: n
+   integer :: a(n, 1024)
+   !CHECK: %[[VAL_6:.*]] = omp.map_info var_ptr(%[[VAL_5]]#1 : !fir.ref<!fir.array<?x1024xi32>>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) bounds(%{{.*}}) -> !fir.ref<!fir.array<?x1024xi32>> {name = "a"}
+   !CHECK: %[[VAL_7:.*]] = omp.map_info val(%[[VAL_1]] : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
+   !CHECK: %[[VAL_8:.*]] = omp.map_info val(%[[VAL_2]] : index)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> index {name = ""}
+   !CHECK: omp.target   map_entries(%[[VAL_6]] -> %[[ARG_1:.*]], %[[VAL_7]] -> %[[ARG_2:.*]], %[[VAL_8]] -> %[[ARG_3:.*]] : !fir.ref<!fir.array<?x1024xi32>>, index, index) {
+   !CHECK: ^bb0(%[[ARG_1]]: !fir.ref<!fir.array<?x1024xi32>>, %[[ARG_2]]: index, %[[ARG_3]]: index):
+   !$omp target
+   !CHECK: %[[VAL_9:.*]] = fir.shape %[[ARG_2]], %[[ARG_3]] : (index, index) -> !fir.shape<2>
+   !CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG_1]](%[[VAL_9]]) {uniq_name = "_QFomp_target_implicit_boundsEa"} : (!fir.ref<!fir.array<?x1024xi32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x1024xi32>>, !fir.ref<!fir.array<?x1024xi32>>)
+   !CHECK: %[[VAL_11:.*]] = arith.constant 33 : i32
+   !CHECK: %[[VAL_12:.*]] = arith.constant 11 : index
+   !CHECK: %[[VAL_13:.*]] = arith.constant 22 : index
+   !CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_12]], %[[VAL_13]])  : (!fir.box<!fir.array<?x1024xi32>>, index, index) -> !fir.ref<i32>
+   !CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_14]] : i32, !fir.ref<i32>
+   a(11, 22) = 33
+   !CHECK: omp.terminator
+   !$omp end target
+!CHECK: }
+end subroutine omp_target_implicit_bounds
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5da05476a683725..89c363b192f40c8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1144,14 +1144,14 @@ def DataBoundsOp : OpenMP_Op<"bounds",
 }
 
 def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
-  let arguments = (ins OpenMP_PointerLikeType:$var_ptr,
+  let arguments = (ins Optional<AnyType>:$val,
+                       Optional<OpenMP_PointerLikeType>:$var_ptr,
                        Optional<OpenMP_PointerLikeType>:$var_ptr_ptr,
                        Variadic<DataBoundsType>:$bounds, /* rank-0 to rank-{n-1} */
                        OptionalAttr<UI64Attr>:$map_type,
                        OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
-                       DefaultValuedAttr<BoolAttr, "false">:$implicit,
                        OptionalAttr<StrAttr>:$name);
-  let results = (outs OpenMP_PointerLikeType:$omp_ptr);
+  let results = (outs AnyType:$omp_ptr);
 
   let description = [{
     The MapInfoOp captures information relating to individual OpenMP map clauses
@@ -1177,11 +1177,12 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
     ```
     =>
     ```mlir
-    omp.map_info var_ptr(%index_ssa) map_type(to) map_capture_type(ByRef) implicit(false)
+    omp.map_info var_ptr(%index_ssa) map_type(to) map_capture_type(ByRef)
       name(index)
     ```
 
     Description of arguments:
+    - `val`: The value to copy.
     - `var_ptr`: The address of variable to copy.
     - `var_ptr_ptr`: Used when the variable copied is a member of a class, structure
       or derived type and refers to the originating struct.
@@ -1189,9 +1190,6 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
       objects (e.g. derived types or classes), indicates the bounds to be copied
       of the variable. When it's an array slice it is in rank order where rank 0
       is the inner-most dimension.
-    - `implicit`: indicates where the map item has been specified explicitly in a
-      map clause or captured implicitly by being used in a target region with no
-      map or other data mapping construct.
     - 'map_clauses': OpenMP map type for this map capture, for example: from, to and
       always. It's a bitfield composed of the OpenMP runtime flags stored in
       OpenMPOffloadMappingFlags.
@@ -1201,9 +1199,10 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
   }];
 
   let assemblyFormat = [{
-    `var_ptr` `(` $var_ptr `:` type($var_ptr) `)`
     oilist(
-        `var_ptr_ptr` `(` $var_ptr_ptr `:` type($var_ptr_ptr) `)`
+        `val` `(` $val `:` type($val) `)`
+      | `var_ptr` `(` $var_ptr `:` type($var_ptr) `)`
+      | `var_ptr_ptr` `(` $var_ptr_ptr `:` type($var_ptr_ptr) `)`
       | `map_clauses` `(` custom<MapClause>($map_type) `)`
       | `capture` `(` custom<CaptureType>($map_capture_type) `)`
       | `bounds` `(` $bounds `)`
@@ -1263,7 +1262,7 @@ def Target_DataOp: OpenMP_Op<"target_data", [AttrSizedOperandSegments]>{
                        Optional<AnyInteger>:$device,
                        Variadic<OpenMP_PointerLikeType>:$use_device_ptr,
                        Variadic<OpenMP_PointerLikeType>:$use_device_addr,
-                       Variadic<OpenMP_PointerLikeType>:$map_operands);
+                       Variadic<AnyType>:$map_operands);
 
   let regions = (region AnyRegion:$region);
 
@@ -1312,7 +1311,7 @@ def Target_EnterDataOp: OpenMP_Op<"target_enter_data",
   let arguments = (ins Optional<I1>:$if_expr,
                        Optional<AnyInteger>:$device,
                        UnitAttr:$nowait,
-                       Variadic<OpenMP_PointerLikeType>:$map_operands);
+                       Variadic<AnyType>:$map_operands);
 
   let assemblyFormat = [{
     oilist(`if` `(` $if_expr `:` type($if_expr) `)`
@@ -1358,7 +1357,7 @@ def Target_ExitDataOp: OpenMP_Op<"target_exit_data",
   let arguments = (ins Optional<I1>:$if_expr,
                        Optional<AnyInteger>:$device,
                        UnitAttr:$nowait,
-                       Variadic<OpenMP_PointerLikeType>:$map_operands);
+                       Variadic<AnyType>:$map_operands);
 
   let assemblyFormat = [{
     oilist(`if` `(` $if_expr `:` type($if_expr) `)`
@@ -1375,7 +1374,7 @@ def Target_ExitDataOp: OpenMP_Op<"target_exit_data",
 // 2.14.5 target construct
 //===----------------------------------------------------------------------===//
 
-def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperandSegments]> {
+def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, OutlineableOpenMPOpInterface, AttrSizedOperandSegments]> {
   let summary = "target construct";
   let description = [{
     The target construct includes a region of code which is to be executed
@@ -1393,6 +1392,10 @@ def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperan
     The optional $nowait elliminates the implicit barrier so the parent task can make progress
     even if the target task is not yet completed.
 
+    The optional $block_args stores the mapping between the block_arguments and their
+    corresponding value outside the region. It only stores those values which haven't already
+    been mapped in $map_operands.
+
     TODO:  is_device_ptr, depend, defaultmap, in_reduction
 
   }];
@@ -1401,7 +1404,7 @@ def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperan
                        Optional<AnyInteger>:$device,
                        Optional<AnyInteger>:$thread_limit,
                        UnitAttr:$nowait,
-                       Variadic<OpenMP_PointerLikeType>:$map_operands);
+                       Variadic<AnyType>:$map_operands);
 
   let regions = (region AnyRegion:$region);
 
@@ -1410,7 +1413,7 @@ def TargetOp : OpenMP_Op<"target",[OutlineableOpenMPOpInterface, AttrSizedOperan
     | `device` `(` $device `:` type($device) `)`
     | `thread_limit` `(` $thread_limit `:` type($thread_limit) `)`
     | `nowait` $nowait
-    | `map_entries` `(` $map_operands `:` type($map_operands) `)`
+    | `map_entries` `(` custom<MapEntries>($map_operands, type($map_operands)) `)`
     ) $region attr-dict
   }];
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 2bf9355ed62676b..ff4bd9d632ed04f 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -693,6 +693,12 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) {
     if (mapTypeMod == "always")
       mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
 
+    if (mapTypeMod == "literal")
+      mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+
+    if (mapTypeMod == "implicit")
+      mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+
     if (mapTypeMod == "close")
       mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
 
@@ -740,6 +746,12 @@ static void printMapClause(OpAsmPrinter &p, Operation *op,
   if (mapTypeToBitFlag(mapTypeBits,
                        llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS))
     mapTypeStrs.push_back("always");
+  if (mapTypeToBitFlag(mapTypeBits,
+                       llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL))
+    mapTypeStrs.push_back("literal");
+  if (mapTypeToBitFlag(mapTypeBits,
+                       llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
+    mapTypeStrs.push_back("implicit");
   if (mapTypeToBitFlag(mapTypeBits,
                        llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE))
     mapTypeStrs.push_back("close");
@@ -780,6 +792,64 @@ static void printMapClause(OpAsmPrinter &p, Operation *op,
   }
 }
 
+static ParseResult
+parseMapEntries(OpAsmParser &parser,
+                SmallVectorImpl<OpAsmParser::UnresolvedOperand> &mapOperands,
+                SmallVectorImpl<Type> &mapOperandTypes) {
+  OpAsmParser::UnresolvedOperand arg;
+  OpAsmParser::UnresolvedOperand blockArg;
+  Type argType;
+  auto parseEntries = [&]() -> ParseResult {
+    if (parser.parseOperand(arg) || parser.parseArrow() ||
+        parser.parseOperand(blockArg))
+      return failure();
+    mapOperands.push_back(arg);
+    return success();
+  };
+
+  auto parseTypes = [&]() -> ParseResult {
+    if (parser.parseType(argType))
+      return failure();
+    mapOperandTypes.push_back(argType);
+    return success();
+  };
+
+  if (parser.parseCommaSeparatedList(parseEntries))
+    return failure();
+
+  if (parser.parseColon())
+    return failure();
+
+  if (parser.parseCommaSeparatedList(parseTypes))
+    return failure();
+
+  return success();
+}
+
+static void printMapEntries(OpAsmPrinter &p, Operation *op,
+                            OperandRange mapOperands,
+                            TypeRange mapOperandTypes) {
+  auto &region = op->getRegion(0);
+  unsigned argIndex = 0;
+
+  for (const auto &mapOp : mapOperands) {
+    const auto &blockArg = region.front().getArgument(argIndex);
+    p << mapOp << " -> " << blockArg;
+    argIndex++;
+    if (argIndex < mapOperands.size())
+      p << ", ";
+  }
+  p << " : ";
+
+  argIndex = 0;
+  for (const auto &mapType : mapOperandTypes) {
+    p << mapType;
+    argIndex++;
+    if (argIndex < mapOperands.size())
+      p << ", ";
+  }
+}
+
 static void printCaptureType(OpAsmPrinter &p, Operation *op,
                              VariableCaptureKindAttr mapCaptureType) {
   std::string typeCapStr;
@@ -826,6 +896,12 @@ static LogicalResult verifyMapClause(Operation *op, OperandRange mapOperands) {
     if (auto MapInfoOp =
             mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp())) {
 
+      if (MapInfoOp.getVal() && MapInfoOp.getVarPtr())
+        emitError(op->getLoc(), "only one of val or var_ptr must be used");
+
+      if (!MapInfoOp.getVal() && !MapInfoOp.getVarPtr())
+        emitError(op->getLoc(), "missing val or var_ptr");
+
       if (!MapInfoOp.getMapType().has_value())
         emitError(op->getLoc(), "missing map type for map operand");
 

>From ab40ab42949aeb81947e72ace7b3e010b79caf8c Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Fri, 22 Sep 2023 18:13:46 +0100
Subject: [PATCH 2/3] [OpenMP][MLIR] Add "IsolatedFromAbove" trait to
 omp.target

This patch adds the MLIR translation changes required for add the IsolatedFromAbove and OutlineableOpenMPOpInterface traits to omp.target. It links the newly added block arguments to their corresponding llvm values.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 70 ++++++++++++++-----
 .../OpenMPToLLVM/convert-to-llvmir.mlir       | 20 ++++--
 mlir/test/Dialect/OpenMP/canonicalize.mlir    |  5 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         |  4 +-
 mlir/test/Dialect/OpenMP/ops.mlir             | 21 ++++--
 .../omptarget-declare-target-llvm-device.mlir | 13 ++--
 .../LLVMIR/omptarget-region-device-llvm.mlir  |  9 +--
 .../omptarget-region-llvm-target-device.mlir  |  9 +--
 .../Target/LLVMIR/omptarget-region-llvm.mlir  |  9 +--
 .../omptarget-region-parallel-llvm.mlir       |  9 +--
 10 files changed, 112 insertions(+), 57 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b60c25340b8d373..44868b788bd3ec5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1685,18 +1685,21 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
     if (!mapInfoOp.getType().isa<LLVM::LLVMPointerType>())
       return fail();
 
-    llvm::Value *mapOpValue =
-        moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
+    llvm::Value *mapOpValue = moduleTranslation.lookupValue(
+        mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr() : mapInfoOp.getVal());
 
-    llvm::Value *refPtr =
-        getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), moduleTranslation);
+    llvm::Value *refPtr = getRefPtrIfDeclareTarget(
+        mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr() : mapInfoOp.getVal(),
+        moduleTranslation);
 
     combinedInfo.BasePointers.emplace_back(refPtr ? refPtr : mapOpValue);
     combinedInfo.Pointers.emplace_back(mapOpValue);
     combinedInfo.DevicePointers.emplace_back(
         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
     combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
-        mapInfoOp.getVarPtr().getLoc(), *ompBuilder));
+        mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr().getLoc()
+                              : mapInfoOp.getVal().getLoc(),
+        *ompBuilder));
 
     auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
         mapTypes[index].cast<IntegerAttr>().getUInt());
@@ -1710,8 +1713,9 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
       mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
 
     combinedInfo.Types.emplace_back(mapFlag);
-    combinedInfo.Sizes.emplace_back(
-        builder.getInt64(getSizeInBytes(DL, mapInfoOp.getVarPtr().getType())));
+    combinedInfo.Sizes.emplace_back(builder.getInt64(getSizeInBytes(
+        DL, mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr().getType()
+                                  : mapInfoOp.getVal().getType())));
     index++;
   }
 
@@ -2007,10 +2011,11 @@ handleDeclareTargetMapVar(llvm::ArrayRef<Value> mapOperands,
   for (const mlir::Value &mapOp : mapOperands) {
     auto mapInfoOp =
         mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
-    llvm::Value *mapOpValue =
-        moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
-    if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(),
-                                                       moduleTranslation)) {
+    llvm::Value *mapOpValue = moduleTranslation.lookupValue(
+        mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr() : mapInfoOp.getVal());
+    if (auto *declareTarget = getRefPtrIfDeclareTarget(
+            mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr() : mapInfoOp.getVal(),
+            moduleTranslation)) {
       // The user's iterator will get invalidated if we modify an element,
       // so we populate this vector of uses to alter each user on an individual
       // basis to emit its own load (rather than one load for all).
@@ -2021,7 +2026,9 @@ handleDeclareTargetMapVar(llvm::ArrayRef<Value> mapOperands,
       for (llvm::User *user : userVec) {
         if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
           auto *load = builder.CreateLoad(
-              moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()),
+              moduleTranslation.convertType(
+                  mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr().getType()
+                                        : mapInfoOp.getVal().getType()),
               declareTarget);
           load->moveBefore(insn);
           user->replaceUsesOfWith(mapOpValue, load);
@@ -2065,6 +2072,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
 
   auto targetOp = cast<omp::TargetOp>(opInst);
   auto &targetRegion = targetOp.getRegion();
+  DataLayout DL = DataLayout(opInst.getParentOfType<ModuleOp>());
+  SmallVector<Value> mapOperands = targetOp.getMapOperands();
+
+  // Remove mapOperands/blockArgs that have no use inside the region.
+  assert(mapOperands.size() == targetRegion.getNumArguments() &&
+         "Number of mapOperands must be same as block_arguments");
+  for (size_t i = 0; i < mapOperands.size(); i++) {
+    if (targetRegion.getArgument(i).use_empty()) {
+      targetRegion.eraseArgument(i);
+      mapOperands.erase(&mapOperands[i]);
+      i--;
+    }
+  }
 
   // This function filters out kernel data that will not show up as kernel
   // input arguments to the generated kernel function but will still need
@@ -2072,7 +2092,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   // (declare target). It also prepares some data used for generating the
   // kernel and populating the associated OpenMP runtime data structures.
   auto getKernelArguments =
-      [&](const llvm::SetVector<Value> &operandSet,
+      [&](const llvm::SmallVectorImpl<Value> &operandSet,
           llvm::SmallVectorImpl<llvm::Value *> &llvmInputs) {
         for (Value operand : operandSet) {
           if (!getRefPtrIfDeclareTarget(operand, moduleTranslation))
@@ -2080,11 +2100,16 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
         }
       };
 
-  llvm::SetVector<Value> operandSet;
-  getUsedValuesDefinedAbove(targetRegion, operandSet);
+  llvm::SmallVector<Value> mapVals;
+  for (Value mapOp : mapOperands) {
+    auto mapInfoOp =
+        mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
+    mapVals.push_back(mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr()
+                                            : mapInfoOp.getVal());
+  }
 
   llvm::SmallVector<llvm::Value *> inputs;
-  getKernelArguments(operandSet, inputs);
+  getKernelArguments(mapVals, inputs);
 
   LogicalResult bodyGenStatus = success();
 
@@ -2092,6 +2117,16 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codeGenIP) -> InsertPointTy {
     builder.restoreIP(codeGenIP);
+    unsigned argIndex = 0;
+    for (auto &mapOp : mapOperands) {
+      auto mapInfoOp =
+          mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
+      llvm::Value *mapOpValue = moduleTranslation.lookupValue(
+          mapInfoOp.getVarPtr() ? mapInfoOp.getVarPtr() : mapInfoOp.getVal());
+      const auto &arg = targetRegion.front().getArgument(argIndex);
+      moduleTranslation.mapValue(arg, mapOpValue);
+      argIndex++;
+    }
     llvm::BasicBlock *exitBlock = convertOmpOpRegions(
         targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
     builder.SetInsertPoint(exitBlock);
@@ -2119,9 +2154,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
-  DataLayout DL = DataLayout(opInst.getParentOfType<ModuleOp>());
-  SmallVector<Value> mapOperands = targetOp.getMapOperands();
-
   auto getMapTypes = [](mlir::OperandRange mapOperands,
                         mlir::MLIRContext *ctx) {
     SmallVector<mlir::Attribute> mapTypes;
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 881d738b413ef15..37a984f77a527a8 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -244,10 +244,12 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr<array<1024 x i32>>, %i : !ll
 // CHECK:                             %[[ARG_0:.*]]: !llvm.ptr<array<1024 x i32>>,
 // CHECK:                             %[[ARG_1:.*]]: !llvm.ptr<i32>) {
 // CHECK:           %[[VAL_0:.*]] = llvm.mlir.constant(64 : i32) : i32
-// CHECK:           %[[MAP:.*]] = omp.map_info var_ptr(%[[ARG_0]] : !llvm.ptr<array<1024 x i32>>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<array<1024 x i32>> {name = ""}
-// CHECK:           omp.target   thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP]] : !llvm.ptr<array<1024 x i32>>) {
+// CHECK:           %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG_0]] : !llvm.ptr<array<1024 x i32>>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<array<1024 x i32>> {name = ""}
+// CHECK:           %[[MAP2:.*]] = omp.map_info var_ptr(%[[ARG_1]] : !llvm.ptr<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr<i32> {name = ""}
+// CHECK:           omp.target   thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP1]] -> %[[BB_ARG0:.*]], %[[MAP2]] -> %[[BB_ARG1:.*]] : !llvm.ptr<array<1024 x i32>>, !llvm.ptr<i32>) {
+// CHECK:           ^bb0(%[[BB_ARG0]]: !llvm.ptr<array<1024 x i32>>, %[[BB_ARG1]]: !llvm.ptr<i32>):
 // CHECK:             %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32
-// CHECK:             llvm.store %[[VAL_1]], %[[ARG_1]] : !llvm.ptr<i32>
+// CHECK:             llvm.store %[[VAL_1]], %[[BB_ARG1]] : !llvm.ptr<i32>
 // CHECK:             omp.terminator
 // CHECK:           }
 // CHECK:           llvm.return
@@ -256,9 +258,11 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr<array<1024 x i32>>, %i : !ll
 llvm.func @_QPomp_target(%a : !llvm.ptr<array<1024 x i32>>, %i : !llvm.ptr<i32>) {
   %0 = llvm.mlir.constant(64 : i32) : i32
   %1 = omp.map_info var_ptr(%a : !llvm.ptr<array<1024 x i32>>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<array<1024 x i32>> {name = ""}
-  omp.target   thread_limit(%0 : i32) map_entries(%1 : !llvm.ptr<array<1024 x i32>>) {
+  %3 = omp.map_info var_ptr(%i : !llvm.ptr<i32>)   map_clauses(literal, implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr<i32> {name = ""}
+  omp.target   thread_limit(%0 : i32) map_entries(%1 -> %arg0, %3 -> %arg1 : !llvm.ptr<array<1024 x i32>>, !llvm.ptr<i32>) {
+    ^bb0(%arg0: !llvm.ptr<array<1024 x i32>>, %arg1: !llvm.ptr<i32>):
     %2 = llvm.mlir.constant(10 : i32) : i32
-    llvm.store %2, %i : !llvm.ptr<i32>
+    llvm.store %2, %arg1 : !llvm.ptr<i32>
     omp.terminator
   }
   llvm.return
@@ -451,7 +455,8 @@ llvm.func @sub_() {
 // CHECK: %[[C_14:.*]] = llvm.mlir.constant(1 : index) : i64
 // CHECK: %[[BOUNDS1:.*]] = omp.bounds   lower_bound(%[[C_12]] : i64) upper_bound(%[[C_11]] : i64) stride(%[[C_14]] : i64) start_idx(%[[C_14]] : i64)
 // CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG_2]] : !llvm.ptr<array<10 x i32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !llvm.ptr<array<10 x i32>> {name = ""}
-// CHECK: omp.target   map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>) {
+// CHECK: omp.target   map_entries(%[[MAP0]] -> %[[BB_ARG0:.*]], %[[MAP1]]  -> %[[BB_ARG1:.*]] : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>) {
+// CHECK: ^bb0(%[[BB_ARG0]]: !llvm.ptr<array<10 x i32>>, %[[BB_ARG1]]: !llvm.ptr<array<10 x i32>>):
 // CHECK:   omp.terminator
 // CHECK: }
 // CHECK: llvm.return
@@ -470,7 +475,8 @@ llvm.func @_QPtarget_map_with_bounds(%arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<arr
   %9 = llvm.mlir.constant(1 : index) : i64
   %10 = omp.bounds   lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%9 : i64) start_idx(%9 : i64)
   %11 = omp.map_info var_ptr(%arg2 : !llvm.ptr<array<10 x i32>>)   map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr<array<10 x i32>> {name = ""}
-  omp.target   map_entries(%5, %11 : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>) {
+  omp.target   map_entries(%5 -> %arg3, %11 -> %arg4: !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>) {
+    ^bb0(%arg3: !llvm.ptr<array<10 x i32>>, %arg4: !llvm.ptr<array<10 x i32>>):
     omp.terminator
   }
   llvm.return
diff --git a/mlir/test/Dialect/OpenMP/canonicalize.mlir b/mlir/test/Dialect/OpenMP/canonicalize.mlir
index 68f5bacb1def178..4ecbb027ba47f24 100644
--- a/mlir/test/Dialect/OpenMP/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenMP/canonicalize.mlir
@@ -131,8 +131,9 @@ func.func private @foo() -> ()
 
 func.func @constant_hoisting_target(%x : !llvm.ptr<i32>) {
   omp.target {
+    ^bb0(%arg0: !llvm.ptr<i32>):
     %c1 = arith.constant 10 : i32
-    llvm.store %c1, %x : i32, !llvm.ptr<i32>
+    llvm.store %c1, %arg0 : i32, !llvm.ptr<i32>
     omp.terminator
   }
   return
@@ -141,4 +142,4 @@ func.func @constant_hoisting_target(%x : !llvm.ptr<i32>) {
 // CHECK-LABEL: func.func @constant_hoisting_target
 // CHECK-NOT: arith.constant
 // CHECK: omp.target
-// CHECK-NEXT: arith.constant
+// CHECK: arith.constant
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index c8025249e27004e..36528e18b236f37 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1617,7 +1617,9 @@ func.func @omp_threadprivate() {
 func.func @omp_target(%map1: memref<?xi32>) {
   %mapv = omp.map_info var_ptr(%map1 : memref<?xi32>)   map_clauses(delete) capture(ByRef) -> memref<?xi32> {name = ""}
   // expected-error @below {{to, from, tofrom and alloc map types are permitted}}
-  omp.target map_entries(%mapv : memref<?xi32>){}
+  omp.target map_entries(%mapv -> %arg0: memref<?xi32>) {
+    ^bb0(%arg0: memref<?xi32>):
+  }
   return
 }
 
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 27c31824c0506df..63ab1663ab5e968 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -492,16 +492,22 @@ func.func @omp_target(%if_cond : i1, %device : si32,  %num_threads : i32, %map1:
     // Test with optional map clause.
     // CHECK: %[[MAP_A:.*]] = omp.map_info var_ptr(%[[VAL_1:.*]] : memref<?xi32>)   map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
     // CHECK: %[[MAP_B:.*]] = omp.map_info var_ptr(%[[VAL_2:.*]] : memref<?xi32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
-    // CHECK: omp.target map_entries(%[[MAP_A]], %[[MAP_B]] : memref<?xi32>, memref<?xi32>) {
+    // CHECK: omp.target map_entries(%[[MAP_A]] -> {{.*}}, %[[MAP_B]] -> {{.*}} : memref<?xi32>, memref<?xi32>) {
     %mapv1 = omp.map_info var_ptr(%map1 : memref<?xi32>)   map_clauses(tofrom) capture(ByRef) -> memref<?xi32> {name = ""}
     %mapv2 = omp.map_info var_ptr(%map2 : memref<?xi32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> memref<?xi32> {name = ""}
-    omp.target map_entries(%mapv1, %mapv2 : memref<?xi32>, memref<?xi32>){}
+    omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref<?xi32>, memref<?xi32>) {
+    ^bb0(%arg0: memref<?xi32>, %arg1: memref<?xi32>):
+      omp.terminator
+    }
     // CHECK: %[[MAP_C:.*]] = omp.map_info var_ptr(%[[VAL_1:.*]] : memref<?xi32>)   map_clauses(to) capture(ByRef) -> memref<?xi32> {name = ""}
     // CHECK: %[[MAP_D:.*]] = omp.map_info var_ptr(%[[VAL_2:.*]] : memref<?xi32>)   map_clauses(always, from) capture(ByRef) -> memref<?xi32> {name = ""}
-    // CHECK: omp.target map_entries(%[[MAP_C]], %[[MAP_D]] : memref<?xi32>, memref<?xi32>) {
+    // CHECK: omp.target map_entries(%[[MAP_C]] -> {{.*}}, %[[MAP_D]] -> {{.*}} : memref<?xi32>, memref<?xi32>) {
     %mapv3 = omp.map_info var_ptr(%map1 : memref<?xi32>)   map_clauses(to) capture(ByRef) -> memref<?xi32> {name = ""}
     %mapv4 = omp.map_info var_ptr(%map2 : memref<?xi32>)   map_clauses(always, from) capture(ByRef) -> memref<?xi32> {name = ""}
-    omp.target map_entries(%mapv3, %mapv4 : memref<?xi32>, memref<?xi32>) {}
+    omp.target map_entries(%mapv3 -> %arg0, %mapv4 -> %arg1 : memref<?xi32>, memref<?xi32>) {
+    ^bb0(%arg0: memref<?xi32>, %arg1: memref<?xi32>):
+      omp.terminator
+    }
     // CHECK: omp.barrier
     omp.barrier
 
@@ -2055,8 +2061,11 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr<array<10 x i32>>, %arg1:
     %10 = omp.bounds   lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%8 : i64) start_idx(%9 : i64)
     %mapv2 = omp.map_info var_ptr(%arg1 : !llvm.ptr<array<10 x i32>>)   map_clauses(exit_release_or_enter_alloc) capture(ByCopy) bounds(%10) -> !llvm.ptr<array<10 x i32>> {name = ""}
 
-    // CHECK: omp.target map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>)
-    omp.target map_entries(%mapv1, %mapv2 : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>){}
+    // CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>)
+    omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>) {
+    ^bb0(%arg2: !llvm.ptr<array<10 x i32>>, %arg3: !llvm.ptr<array<10 x i32>>):
+      omp.terminator
+    }
 
     // CHECK: omp.target_data map_entries(%[[MAP0]], %[[MAP1]] : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>)
     omp.target_data map_entries(%mapv1, %mapv2 : !llvm.ptr<array<10 x i32>>, !llvm.ptr<array<10 x i32>>){}
diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
index f279772bd6c0969..f4c7a626398f5a9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir
@@ -1,10 +1,10 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 // This test the generation of additional load operations for declare target link variables
-// inside of target op regions when lowering to IR for device. Unfortunately as the host file is not 
+// inside of target op regions when lowering to IR for device. Unfortunately as the host file is not
 // passed as a module attribute, we miss out on the metadata and entryinfo.
 //
-// Unfortunately, only so much can be tested as the device side is dependent on a *.bc 
+// Unfortunately, only so much can be tested as the device side is dependent on a *.bc
 // file created by the host and appended as an attribute to the module.
 
 module attributes {omp.is_target_device = true} {
@@ -13,18 +13,19 @@ module attributes {omp.is_target_device = true} {
     %0 = llvm.mlir.constant(0 : i32) : i32
     llvm.return %0 : i32
   }
-                                                            
+
   llvm.func @_QQmain() attributes {} {
     %0 = llvm.mlir.addressof @_QMtest_0Esp : !llvm.ptr<i32>
-  
+
   // CHECK-DAG:   omp.target:                                       ; preds = %user_code.entry
   // CHECK-DAG: %1 = load ptr, ptr @_QMtest_0Esp_decl_tgt_ref_ptr, align 8
   // CHECK-DAG: store i32 1, ptr %1, align 4
   // CHECK-DAG: br label %omp.region.cont
     %map = omp.map_info var_ptr(%0 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
-    omp.target   map_entries(%map : !llvm.ptr<i32>) {
+    omp.target   map_entries(%map -> %arg0 : !llvm.ptr<i32>) {
+      ^bb0(%arg0: !llvm.ptr<i32>):
       %1 = llvm.mlir.constant(1 : i32) : i32
-      llvm.store %1, %0 : !llvm.ptr<i32>
+      llvm.store %1, %arg0 : !llvm.ptr<i32>
       omp.terminator
     }
 
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
index 99f1a3b072ad8fe..d02e554ccd4d525 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -15,11 +15,12 @@ module attributes {omp.is_target_device = true} {
     %map1 = omp.map_info var_ptr(%3 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
     %map2 = omp.map_info var_ptr(%5 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
     %map3 = omp.map_info var_ptr(%7 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
-    omp.target map_entries(%map1, %map2, %map3 : !llvm.ptr<i32>, !llvm.ptr<i32>, !llvm.ptr<i32>) {
-      %8 = llvm.load %3 : !llvm.ptr<i32>
-      %9 = llvm.load %5 : !llvm.ptr<i32>
+    omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr<i32>, !llvm.ptr<i32>, !llvm.ptr<i32>) {
+    ^bb0(%arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>, %arg2: !llvm.ptr<i32>):
+      %8 = llvm.load %arg0 : !llvm.ptr<i32>
+      %9 = llvm.load %arg1 : !llvm.ptr<i32>
       %10 = llvm.add %8, %9  : i32
-      llvm.store %10, %7 : !llvm.ptr<i32>
+      llvm.store %10, %arg2 : !llvm.ptr<i32>
       omp.terminator
     }
     llvm.return
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir
index 01a3bd556294f3e..1032a8d8bc999bb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir
@@ -3,10 +3,11 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 module attributes {omp.is_target_device = true} {
-  llvm.func @writeindex_omp_outline_0_(%arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>) attributes {omp.outline_parent_name = "writeindex_"} {
-    %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
-    %1 = omp.map_info var_ptr(%arg1 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
-    omp.target   map_entries(%0, %1 : !llvm.ptr<i32>, !llvm.ptr<i32>) {
+  llvm.func @writeindex_omp_outline_0_(%val0: !llvm.ptr<i32>, %val1: !llvm.ptr<i32>) attributes {omp.outline_parent_name = "writeindex_"} {
+    %0 = omp.map_info var_ptr(%val0 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
+    %1 = omp.map_info var_ptr(%val1 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
+    omp.target   map_entries(%0 -> %arg0, %1 -> %arg1 : !llvm.ptr<i32>, !llvm.ptr<i32>) {
+    ^bb0(%arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>):
       %2 = llvm.mlir.constant(20 : i32) : i32
       %3 = llvm.mlir.constant(10 : i32) : i32
       llvm.store %3, %arg0 : !llvm.ptr<i32>
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
index 51506386d83782d..1c4a756eabef71e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
@@ -15,11 +15,12 @@ module attributes {omp.is_target_device = false} {
     %map1 = omp.map_info var_ptr(%3 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
     %map2 = omp.map_info var_ptr(%5 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
     %map3 = omp.map_info var_ptr(%7 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
-    omp.target map_entries(%map1, %map2, %map3 : !llvm.ptr<i32>, !llvm.ptr<i32>, !llvm.ptr<i32>) {
-      %8 = llvm.load %3 : !llvm.ptr<i32>
-      %9 = llvm.load %5 : !llvm.ptr<i32>
+    omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr<i32>, !llvm.ptr<i32>, !llvm.ptr<i32>) {
+    ^bb0(%arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>, %arg2: !llvm.ptr<i32>):
+      %8 = llvm.load %arg0 : !llvm.ptr<i32>
+      %9 = llvm.load %arg1 : !llvm.ptr<i32>
       %10 = llvm.add %8, %9  : i32
-      llvm.store %10, %7 : !llvm.ptr<i32>
+      llvm.store %10, %arg2 : !llvm.ptr<i32>
       omp.terminator
     }
     llvm.return
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir
index 5741e44862d8b3f..80ebee56171735f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir
@@ -15,12 +15,13 @@ module attributes {omp.is_target_device = false} {
     %map1 = omp.map_info var_ptr(%3 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
     %map2 = omp.map_info var_ptr(%5 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
     %map3 = omp.map_info var_ptr(%7 : !llvm.ptr<i32>)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr<i32> {name = ""}
-    omp.target map_entries( %map1, %map2, %map3 : !llvm.ptr<i32>, !llvm.ptr<i32>, !llvm.ptr<i32>) {
+    omp.target map_entries( %map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr<i32>, !llvm.ptr<i32>, !llvm.ptr<i32>) {
+    ^bb0(%arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>, %arg2: !llvm.ptr<i32>):
       omp.parallel {
-        %8 = llvm.load %3 : !llvm.ptr<i32>
-        %9 = llvm.load %5 : !llvm.ptr<i32>
+        %8 = llvm.load %arg0 : !llvm.ptr<i32>
+        %9 = llvm.load %arg1 : !llvm.ptr<i32>
         %10 = llvm.add %8, %9  : i32
-        llvm.store %10, %7 : !llvm.ptr<i32>
+        llvm.store %10, %arg2 : !llvm.ptr<i32>
         omp.terminator
         }
       omp.terminator

>From 56abd2ac98c3091e663316bd7fde717207fb4b40 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Mon, 25 Sep 2023 12:31:30 +0100
Subject: [PATCH 3/3] [OpenMP][MLIR]OMPEarlyOutliningPass removal

This patch removes the OMPEarlyOutliningPass as it is no longer required. The implicit map operand capture has now been moved to the PFT lowering stage.
---
 .../flang/Optimizer/Transforms/Passes.h       |   2 -
 .../flang/Optimizer/Transforms/Passes.td      |  18 +-
 flang/include/flang/Tools/CLOptions.inc       |   4 +-
 flang/lib/Optimizer/Transforms/CMakeLists.txt |   1 -
 .../Transforms/OMPEarlyOutlining.cpp          | 303 ------------------
 .../OpenMP/FIR/omp-target-early-outlining.f90 |  89 -----
 .../Lower/OpenMP/function-filtering-2.f90     |   2 -
 .../test/Lower/OpenMP/function-filtering.f90  |   5 -
 8 files changed, 4 insertions(+), 420 deletions(-)
 delete mode 100644 flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
 delete mode 100644 flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 30d97be3800c191..92bc7246eca7005 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -75,8 +75,6 @@ std::unique_ptr<mlir::Pass>
 createAlgebraicSimplificationPass(const mlir::GreedyRewriteConfig &config);
 std::unique_ptr<mlir::Pass> createPolymorphicOpConversionPass();
 
-std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
-createOMPEarlyOutliningPass();
 std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
 createOMPMarkDeclareTargetPass();
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 6d211a535b53f70..805f53fc4bb036a 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -16,7 +16,7 @@
 
 include "mlir/Pass/PassBase.td"
 
-class AbstractResultOptBase<string optExt, string operation> 
+class AbstractResultOptBase<string optExt, string operation>
   : Pass<"abstract-result-on-" # optExt # "-opt", operation> {
   let summary = "Convert fir.array, fir.box and fir.rec function result to "
                 "function argument";
@@ -297,8 +297,8 @@ def PolymorphicOpConversion : Pass<"fir-polymorphic-op", "::mlir::func::FuncOp">
   let summary =
     "Simplify operations on polymorphic types";
   let description = [{
-    This pass breaks up the lowering of operations on polymorphic types by 
-    introducing an intermediate FIR level that simplifies code geneation. 
+    This pass breaks up the lowering of operations on polymorphic types by
+    introducing an intermediate FIR level that simplifies code geneation.
   }];
   let constructor = "::fir::createPolymorphicOpConversionPass()";
   let dependentDialects = [
@@ -318,18 +318,6 @@ def LoopVersioning : Pass<"loop-versioning", "mlir::func::FuncOp"> {
   let dependentDialects = [ "fir::FIROpsDialect" ];
 }
 
-def OMPEarlyOutliningPass
-    : Pass<"omp-early-target-outlining", "mlir::ModuleOp"> {
-  let summary = "Outlines all target ops into separate functions";
-  let description = [{
-    This pass outlines all omp.target operations into individual functions.
-    It is invoked in the front end after the initial FIR has been constructed.
-    This pass is only needed when compiling for the target device to prevent
-    the optimizer to perform transforms across target region boundaries.
-  }];
-  let constructor = "::fir::createOMPEarlyOutliningPass()";
-}
-
 def OMPMarkDeclareTargetPass
     : Pass<"omp-mark-declare-target", "mlir::ModuleOp"> {
   let summary = "Marks all functions called by an OpenMP declare target function as declare target";
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 2ed716382feb43f..c452c023b4a80ce 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -274,10 +274,8 @@ inline void createHLFIRToFIRPassPipeline(
 inline void createOpenMPFIRPassPipeline(
     mlir::PassManager &pm, bool isTargetDevice) {
   pm.addPass(fir::createOMPMarkDeclareTargetPass());
-  if (isTargetDevice) {
-    pm.addPass(fir::createOMPEarlyOutliningPass());
+  if (isTargetDevice)
     pm.addPass(fir::createOMPFunctionFilteringPass());
-  }
 }
 
 #if !defined(FLANG_EXCLUDE_CODEGEN)
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 98314fa7a2087fe..03b67104a93b575 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -17,7 +17,6 @@ add_flang_library(FIRTransforms
   AddDebugFoundation.cpp
   PolymorphicOpConversion.cpp
   LoopVersioning.cpp
-  OMPEarlyOutlining.cpp
   OMPFunctionFiltering.cpp
   OMPMarkDeclareTarget.cpp
   VScaleAttr.cpp
diff --git a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
deleted file mode 100644
index 92fbdd0bbf5d4a1..000000000000000
--- a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-#include "flang/Optimizer/Dialect/FIRDialect.h"
-#include "flang/Optimizer/Dialect/FIROps.h"
-#include "flang/Optimizer/Dialect/FIRType.h"
-#include "flang/Optimizer/HLFIR/HLFIROps.h"
-#include "flang/Optimizer/Support/InternalNames.h"
-#include "flang/Optimizer/Transforms/Passes.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
-#include "mlir/IR/BuiltinDialect.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/IRMapping.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/RegionUtils.h"
-#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
-
-namespace fir {
-#define GEN_PASS_DEF_OMPEARLYOUTLININGPASS
-#include "flang/Optimizer/Transforms/Passes.h.inc"
-} // namespace fir
-
-namespace {
-class OMPEarlyOutliningPass
-    : public fir::impl::OMPEarlyOutliningPassBase<OMPEarlyOutliningPass> {
-
-  std::string getOutlinedFnName(llvm::StringRef parentName, unsigned count) {
-    return std::string(parentName) + "_omp_outline_" + std::to_string(count);
-  }
-
-  // Given a value this function will iterate over an operators results
-  // and return the relevant index for the result the value corresponds to.
-  // There may be a simpler way to do this however.
-  static unsigned getResultIndex(mlir::Value value, mlir::Operation *op) {
-    for (unsigned i = 0; i < op->getNumResults(); ++i) {
-      if (op->getResult(i) == value)
-        return i;
-    }
-    return 0;
-  }
-
-  static bool isAddressOfGlobalDeclareTarget(mlir::Value value) {
-    if (fir::AddrOfOp addressOfOp =
-            mlir::dyn_cast_if_present<fir::AddrOfOp>(value.getDefiningOp()))
-      if (fir::GlobalOp gOp = mlir::dyn_cast_if_present<fir::GlobalOp>(
-              addressOfOp->getParentOfType<mlir::ModuleOp>().lookupSymbol(
-                  addressOfOp.getSymbol())))
-        if (auto declareTargetGlobal =
-                llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
-                    gOp.getOperation()))
-          if (declareTargetGlobal.isDeclareTarget())
-            return true;
-    return false;
-  }
-
-  // Currently used for cloning arguments that are nested. Should be
-  // extendable where required, perhaps via operation
-  // specialisation/overloading, if something needs specialised handling.
-  // NOTE: Results in duplication of some values that would otherwise be
-  // a single SSA value shared between operations, this is tidied up on
-  // lowering to some extent.
-  static mlir::Operation *
-  cloneArgAndChildren(mlir::OpBuilder &builder, mlir::Operation *op,
-                      llvm::SetVector<mlir::Value> &inputs,
-                      mlir::Block::BlockArgListType &newInputs) {
-    mlir::IRMapping valueMap;
-    for (mlir::Value opValue : op->getOperands()) {
-      if (opValue.getDefiningOp()) {
-        unsigned resIdx = getResultIndex(opValue, opValue.getDefiningOp());
-        valueMap.map(opValue,
-                     cloneArgAndChildren(builder, opValue.getDefiningOp(),
-                                         inputs, newInputs)
-                         ->getResult(resIdx));
-      } else {
-        for (auto inArg : llvm::zip(inputs, newInputs)) {
-          if (opValue == std::get<0>(inArg))
-            valueMap.map(opValue, std::get<1>(inArg));
-        }
-      }
-    }
-
-    return builder.clone(*op, valueMap);
-  }
-
-  static void cloneMapOpVariables(mlir::OpBuilder &builder,
-                                  mlir::IRMapping &valueMap,
-                                  mlir::IRMapping &mapInfoMap,
-                                  llvm::SetVector<mlir::Value> &inputs,
-                                  mlir::Block::BlockArgListType &newInputs,
-                                  mlir::Value varPtr) {
-    if (fir::BoxAddrOp boxAddrOp =
-            mlir::dyn_cast_if_present<fir::BoxAddrOp>(varPtr.getDefiningOp())) {
-      mlir::Value newV =
-          cloneArgAndChildren(builder, boxAddrOp, inputs, newInputs)
-              ->getResult(0);
-      mapInfoMap.map(varPtr, newV);
-      valueMap.map(boxAddrOp, newV);
-      return;
-    }
-
-    // Clone into the outlined function all hlfir.declare ops that define inputs
-    // to the target region and set up remapping of its inputs and outputs.
-    if (auto declareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
-            varPtr.getDefiningOp())) {
-      auto clone = llvm::cast<hlfir::DeclareOp>(
-          cloneArgAndChildren(builder, declareOp, inputs, newInputs));
-      mlir::Value newBase = clone.getBase();
-      mlir::Value newOrigBase = clone.getOriginalBase();
-      mapInfoMap.map(varPtr, newOrigBase);
-      valueMap.map(declareOp.getBase(), newBase);
-      valueMap.map(declareOp.getOriginalBase(), newOrigBase);
-      return;
-    }
-
-    if (isAddressOfGlobalDeclareTarget(varPtr)) {
-      fir::AddrOfOp addrOp =
-          mlir::dyn_cast<fir::AddrOfOp>(varPtr.getDefiningOp());
-      mlir::Value newV = builder.clone(*addrOp)->getResult(0);
-      mapInfoMap.map(varPtr, newV);
-      valueMap.map(addrOp, newV);
-      return;
-    }
-
-    for (auto inArg : llvm::zip(inputs, newInputs)) {
-      if (varPtr == std::get<0>(inArg))
-        mapInfoMap.map(varPtr, std::get<1>(inArg));
-    }
-  }
-
-  mlir::func::FuncOp outlineTargetOp(mlir::OpBuilder &builder,
-                                     mlir::omp::TargetOp &targetOp,
-                                     mlir::func::FuncOp &parentFunc,
-                                     unsigned count) {
-    // NOTE: once implicit captures are handled appropriately in the initial
-    // PFT lowering if it is possible, we can remove the usage of
-    // getUsedValuesDefinedAbove and instead just iterate over the target op's
-    // operands (or just the map arguments) and perhaps refactor this function
-    // a little.
-    // Collect inputs
-    llvm::SetVector<mlir::Value> inputs;
-    mlir::Region &targetRegion = targetOp.getRegion();
-    mlir::getUsedValuesDefinedAbove(targetRegion, inputs);
-
-    // Collect all map info. Even non-used maps must be collected to avoid ICEs.
-    for (mlir::Value oper : targetOp->getOperands()) {
-      if (auto mapEntry =
-              mlir::dyn_cast<mlir::omp::MapInfoOp>(oper.getDefiningOp())) {
-        if (!inputs.contains(mapEntry.getVarPtr()))
-          inputs.insert(mapEntry.getVarPtr());
-      }
-    }
-
-    // Filter out declare-target and map entries which are specially handled
-    // at the moment, so we do not wish these to end up as function arguments
-    // which would just be more noise in the IR.
-    llvm::SmallVector<mlir::Value> blockArgs;
-    for (llvm::SetVector<mlir::Value>::iterator iter = inputs.begin(); iter != inputs.end();) {
-      if (mlir::isa_and_nonnull<mlir::omp::MapInfoOp>(iter->getDefiningOp()) ||
-          isAddressOfGlobalDeclareTarget(*iter)) {
-        iter = inputs.erase(iter);
-      } else if (auto declareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
-                     iter->getDefiningOp())) {
-        // Gather hlfir.declare arguments to be added later, after the
-        // hlfir.declare operation itself has been removed as an input.
-        blockArgs.push_back(declareOp.getMemref());
-        if (mlir::Value shape = declareOp.getShape())
-          blockArgs.push_back(shape);
-        for (mlir::Value typeParam : declareOp.getTypeparams())
-          blockArgs.push_back(typeParam);
-        iter = inputs.erase(iter);
-      } else {
-        ++iter;
-      }
-    }
-
-    // Add function arguments to the list of inputs if they are used by an
-    // hlfir.declare operation.
-    for (mlir::Value arg : blockArgs) {
-      if (!arg.getDefiningOp() && !inputs.contains(arg))
-        inputs.insert(arg);
-    }
-
-    // Create new function and initialize
-    mlir::FunctionType funcType = builder.getFunctionType(
-        mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange());
-    std::string parentName(parentFunc.getName());
-    std::string funcName = getOutlinedFnName(parentName, count);
-    mlir::Location loc = targetOp.getLoc();
-    mlir::func::FuncOp newFunc =
-        mlir::func::FuncOp::create(loc, funcName, funcType);
-    mlir::Block *entryBlock = newFunc.addEntryBlock();
-    builder.setInsertionPointToStart(entryBlock);
-    mlir::Block::BlockArgListType newInputs = entryBlock->getArguments();
-
-    // Set the declare target information, the outlined function
-    // is always a host function.
-    if (auto parentDTOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
-            parentFunc.getOperation()))
-      if (auto newDTOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
-              newFunc.getOperation()))
-        newDTOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::host,
-                                 parentDTOp.getDeclareTargetCaptureClause());
-
-    // Set the early outlining interface parent name
-    if (auto earlyOutlineOp =
-            llvm::dyn_cast<mlir::omp::EarlyOutliningInterface>(
-                newFunc.getOperation()))
-      earlyOutlineOp.setParentName(parentName);
-
-    // The value map for the newly generated Target Operation, we must
-    // remap most of the input.
-    mlir::IRMapping valueMap;
-
-    // Special handling for map, declare target and regular map variables
-    // are handled slightly differently for the moment, declare target has
-    // its addressOfOp cloned over, whereas we skip it for the regular map
-    // variables. We need knowledge of which global is linked to the map
-    // operation for declare target, whereas we aren't bothered for the
-    // regular map variables for the moment. We could treat both the same,
-    // however, cloning across the minimum for the moment to avoid
-    // optimisations breaking segments of the lowering seems prudent as this
-    // was the original intent of the pass.
-    for (mlir::Value oper : targetOp->getOperands()) {
-      if (auto mapEntry =
-              mlir::dyn_cast<mlir::omp::MapInfoOp>(oper.getDefiningOp())) {
-        mlir::IRMapping mapInfoMap;
-        for (mlir::Value bound : mapEntry.getBounds()) {
-          if (auto mapEntryBound = mlir::dyn_cast<mlir::omp::DataBoundsOp>(
-                  bound.getDefiningOp())) {
-            mapInfoMap.map(bound, cloneArgAndChildren(builder, mapEntryBound,
-                                                      inputs, newInputs)
-                                      ->getResult(0));
-          }
-        }
-
-        cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs,
-                            mapEntry.getVarPtr());
-
-        if (mapEntry.getVarPtrPtr())
-          cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs,
-                              mapEntry.getVarPtrPtr());
-
-        valueMap.map(
-            mapEntry,
-            builder.clone(*mapEntry.getOperation(), mapInfoMap)->getResult(0));
-      }
-    }
-
-    for (auto inArg : llvm::zip(inputs, newInputs))
-      valueMap.map(std::get<0>(inArg), std::get<1>(inArg));
-
-    // Clone the target op into the new function
-    builder.clone(*(targetOp.getOperation()), valueMap);
-
-    // Create return op
-    builder.create<mlir::func::ReturnOp>(loc);
-
-    return newFunc;
-  }
-
-  // Returns true if a target region was found in the function.
-  bool outlineTargetOps(mlir::OpBuilder &builder,
-                        mlir::func::FuncOp &functionOp,
-                        mlir::ModuleOp &moduleOp,
-                        llvm::SmallVectorImpl<mlir::func::FuncOp> &newFuncs) {
-    unsigned count = 0;
-    for (auto TargetOp : functionOp.getOps<mlir::omp::TargetOp>()) {
-      mlir::func::FuncOp outlinedFunc =
-          outlineTargetOp(builder, TargetOp, functionOp, count);
-      newFuncs.push_back(outlinedFunc);
-      count++;
-    }
-    return count > 0;
-  }
-
-  void runOnOperation() override {
-    mlir::ModuleOp moduleOp = getOperation();
-    mlir::MLIRContext *context = &getContext();
-    mlir::OpBuilder builder(context);
-    llvm::SmallVector<mlir::func::FuncOp> newFuncs;
-
-    for (auto functionOp :
-         llvm::make_early_inc_range(moduleOp.getOps<mlir::func::FuncOp>())) {
-      bool outlined = outlineTargetOps(builder, functionOp, moduleOp, newFuncs);
-      if (outlined)
-        functionOp.erase();
-    }
-
-    for (auto newFunc : newFuncs)
-      moduleOp.push_back(newFunc);
-  }
-};
-
-} // namespace
-
-namespace fir {
-std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
-createOMPEarlyOutliningPass() {
-  return std::make_unique<OMPEarlyOutliningPass>();
-}
-} // namespace fir
diff --git a/flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90 b/flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90
deleted file mode 100644
index eac19f889f433cd..000000000000000
--- a/flang/test/Lower/OpenMP/FIR/omp-target-early-outlining.f90
+++ /dev/null
@@ -1,89 +0,0 @@
-!REQUIRES: amdgpu-registered-target
-
-!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
-!RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
-!RUN: bbc -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s 
-!RUN: bbc -emit-fir -fopenmp -fopenmp-is-gpu -fopenmp-is-target-device %s -o - | FileCheck %s 
-
-!CHECK: func.func @_QPtarget_function
-
-!CHECK:  func.func @_QPwrite_index_omp_outline_0(%[[ARG0:.*]]: !fir.ref<i32>) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QPwrite_index"} {
-!CHECK-NEXT: %[[map_info0:.*]] = omp.map_info var_ptr(%[[ARG0]]{{.*}}
-!CHECK-NEXT: omp.target  map_entries(%[[map_info0]]{{.*}} {
-!CHECK: %[[CONSTANT_VALUE_10:.*]] = arith.constant 10 : i32
-!CHECK: fir.store %[[CONSTANT_VALUE_10]] to %[[ARG0]] : !fir.ref<i32>
-!CHECK: omp.terminator
-!CHECK-NEXT: }
-!CHECK-NEXT: return
-
-!CHECK:  func.func @_QPwrite_index_omp_outline_1(%[[ARG1:.*]]: !fir.ref<i32>) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QPwrite_index"} {
-!CHECK-NEXT: %[[map_info1:.*]] = omp.map_info var_ptr(%[[ARG1]]{{.*}}
-!CHECK-NEXT: omp.target  map_entries(%[[map_info1]]{{.*}} {
-!CHECK: %[[CONSTANT_VALUE_20:.*]] = arith.constant 20 : i32
-!CHECK: fir.store %[[CONSTANT_VALUE_20]] to %[[ARG1]] : !fir.ref<i32>
-!CHECK: omp.terminator
-!CHECK-NEXT: }
-!CHECK-NEXT: return
-
-
-SUBROUTINE WRITE_INDEX(INT_ARRAY)
-        INTEGER :: INT_ARRAY(*)
-        INTEGER :: NEW_LEN
-!$omp target map(from:new_len)
-        NEW_LEN = 10
-!$omp end target
-!$omp target map(from:new_len)
-        NEW_LEN = 20
-!$omp end target
-        do INDEX_ = 1, NEW_LEN
-                INT_ARRAY(INDEX_) = INDEX_
-        end do
-end subroutine WRITE_INDEX
-
-SUBROUTINE TARGET_FUNCTION()
-!$omp declare target
-END
-
-!CHECK: func.func @_QParray_bounds_omp_outline_0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<!fir.array<10xi32>>) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QParray_bounds"} {
-!CHECK: %[[C1:.*]] = arith.constant 1 : index
-!CHECK: %[[C4:.*]] = arith.constant 4 : index
-!CHECK: %[[C1_0:.*]] = arith.constant 1 : index
-!CHECK: %[[C1_1:.*]] = arith.constant 1 : index
-!CHECK: %[[BOUNDS:.*]] = omp.bounds   lower_bound(%[[C1]] : index) upper_bound(%[[C4]] : index) stride(%[[C1_1]] : index) start_idx(%[[C1_1]] : index)
-!CHECK: %[[ENTRY:.*]] = omp.map_info var_ptr(%[[ARG1]] : !fir.ref<!fir.array<10xi32>>)   map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<10xi32>> {name = "sp_write(2:5)"}
-!CHECK: omp.target   map_entries(%[[ENTRY]] : !fir.ref<!fir.array<10xi32>>) {
-!CHECK:  %c2_i32 = arith.constant 2 : i32
-!CHECK:  %2 = fir.convert %c2_i32 : (i32) -> index
-!CHECK:  %c5_i32 = arith.constant 5 : i32
-!CHECK:  %3 = fir.convert %c5_i32 : (i32) -> index
-!CHECK:  %c1_2 = arith.constant 1 : index
-!CHECK:  %4 = fir.convert %2 : (index) -> i32
-!CHECK:  %5:2 = fir.do_loop %arg2 = %2 to %3 step %c1_2 iter_args(%arg3 = %4) -> (index, i32) {
-!CHECK:    fir.store %arg3 to %[[ARG0]] : !fir.ref<i32>
-!CHECK:    %6 = fir.load %[[ARG0]] : !fir.ref<i32>
-!CHECK:    %7 = fir.load %[[ARG0]] : !fir.ref<i32>
-!CHECK:    %8 = fir.convert %7 : (i32) -> i64
-!CHECK:    %c1_i64 = arith.constant 1 : i64
-!CHECK:    %9 = arith.subi %8, %c1_i64 : i64
-!CHECK:    %10 = fir.coordinate_of %[[ARG1]], %9 : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
-!CHECK:    fir.store %6 to %10 : !fir.ref<i32>
-!CHECK:    %11 = arith.addi %arg2, %c1_2 : index
-!CHECK:    %12 = fir.convert %c1_2 : (index) -> i32
-!CHECK:    %13 = fir.load %[[ARG0]] : !fir.ref<i32>
-!CHECK:    %14 = arith.addi %13, %12 : i32
-!CHECK:    fir.result %11, %14 : index, i32
-!CHECK:  }
-!CHECK: fir.store %5#1 to %[[ARG0]] : !fir.ref<i32>
-!CHECK:  omp.terminator
-!CHECK: }
-!CHECK:return
-!CHECK:}
-
-SUBROUTINE ARRAY_BOUNDS()
-        INTEGER :: sp_write(10) = (/0,0,0,0,0,0,0,0,0,0/)
-!$omp target map(tofrom:sp_write(2:5))
-        do i = 2, 5
-                sp_write(i) = i
-        end do
-!$omp end target
-end subroutine ARRAY_BOUNDS
diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90
index 8219be5ad1e40ca..17cd0d44c01b4bc 100644
--- a/flang/test/Lower/OpenMP/function-filtering-2.f90
+++ b/flang/test/Lower/OpenMP/function-filtering-2.f90
@@ -26,9 +26,7 @@ subroutine no_declaretarget()
 end subroutine no_declaretarget
 
 ! MLIR-HOST: func.func @{{.*}}main(
-! MLIR-HOST-NOT: func.func @{{.*}}main_omp_outline{{.*}}()
 ! MLIR-DEVICE-NOT: func.func @{{.*}}main(
-! MLIR-DEVICE: func.func @{{.*}}main_omp_outline{{.*}}() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"}
 ! MLIR-ALL: return
 
 ! LLVM-HOST: define {{.*}} @{{.*}}main{{.*}}(
diff --git a/flang/test/Lower/OpenMP/function-filtering.f90 b/flang/test/Lower/OpenMP/function-filtering.f90
index 3de14aa4709fc4c..e550348e50692c5 100644
--- a/flang/test/Lower/OpenMP/function-filtering.f90
+++ b/flang/test/Lower/OpenMP/function-filtering.f90
@@ -34,14 +34,9 @@ end function host_fn
 
 ! MLIR-HOST: func.func @{{.*}}target_subr(
 ! MLIR-HOST: return
-! MLIR-HOST-NOT: func.func @{{.*}}target_subr_omp_outline_0(
-! MLIR-DEVICE-NOT: func.func @{{.*}}target_subr(
-! MLIR-DEVICE: func.func @{{.*}}target_subr_omp_outline_0(
 ! MLIR-DEVICE: return
 
-! LLVM-ALL-NOT: define {{.*}} @{{.*}}target_subr_omp_outline_0{{.*}}(
 ! LLVM-HOST: define {{.*}} @{{.*}}target_subr{{.*}}(
-! LLVM-DEVICE-NOT: {{.*}} @{{.*}}target_subr{{.*}}(
 ! LLVM-ALL: define {{.*}} @__omp_offloading_{{.*}}_{{.*}}_target_subr__{{.*}}(
 subroutine target_subr(x)
   integer, intent(out) :: x



More information about the flang-commits mailing list