[Openmp-commits] [openmp] 6a62707 - [Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering utilising omp.bounds (#68689)

Mon Oct 30 08:00:28 PDT 2023

Author: agozillon
Date: 2023-10-30T16:00:23+01:00
New Revision: 6a62707c048e16ce9bad37ed8e3520799139436b

URL: https://github.com/llvm/llvm-project/commit/6a62707c048e16ce9bad37ed8e3520799139436b
DIFF: https://github.com/llvm/llvm-project/commit/6a62707c048e16ce9bad37ed8e3520799139436b.diff

LOG: [Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR lowering utilising omp.bounds (#68689)

This patch seeks to add initial lowering of OpenMP array sections within
target region map clauses from MLIR to LLVM IR.

This patch seeks to support fixed sized contiguous (don't think OpenMP
supports anything other than contiguous sections from my reading but i
could be wrong) arrays initially, before looking toward assumed size and
shaped arrays. The patch also currently does not include stride, it's
left for future work.

Although, assumed size works in some fashion (dummy arguments) with some
minor alterations to the OMPEarlyOutliner, so it is possible changes
made in the IsolatedFromAbove series may allow this to work with no
further required patches.

It utilises the generated omp.bounds to calculate the size of the mapped
OpenMP array (both for sectioned and un-sectioned arrays) as well as the
offset to be passed to the kernel argument structure.

Alongside these changes some refactoring of how map data is handled is
attempted, using a new MapData structure to keep track of information
utilised in the lowering of mapped values.

The initial addition of a more complex createDeviceArgumentAccessor that
utilises capture kinds similarly to (and loosely based on) Clang to
generate different kernel argument accesses is also added.

A similar function for altering how the kernel argument is passed to the
kernel argument structure on the host is also utilised
(createAlteredByCaptureMap), which allows modification of the
pointer/basePointer based on their capture (and bounds information).
It's of note ByRef, is the default for explicit mappings and ByCopy will
be the default for implicit captures, so the former is currently tested
in this patch and the latter is not for the moment.

Added: 
    mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
    mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
    mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
    openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
    openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
    openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
    openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90

Modified: 
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Target/LLVMIR/omptarget-llvm.mlir
    mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 875ce11391587ed..67875f668d4d3e6 100644

--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1537,13 +1537,6 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
-int64_t getSizeInBytes(DataLayout &DL, const Type &type, const Type &eleType) {
-  if (isa<LLVM::LLVMPointerType>(type))
-    return DL.getTypeSize(eleType);
-
-  return 0;
-}
-
 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
 convertToDeviceClauseKind(mlir::omp::DeclareTargetDeviceType deviceClause) {
   switch (deviceClause) {
@@ -1638,13 +1631,141 @@ getRefPtrIfDeclareTarget(mlir::Value value,
   return nullptr;
 }
 
+// A small helper structure to contain data gathered
+// for map lowering and coalese it into one area and
+// avoiding extra computations such as searches in the
+// llvm module for lowered mapped varibles or checking
+// if something is declare target (and retrieving the
+// value) more than neccessary.
+struct MapInfoData : llvm::OpenMPIRBuilder::MapInfosTy {
+  llvm::SmallVector<bool, 4> IsDeclareTarget;
+  llvm::SmallVector<mlir::Operation *, 4> MapClause;
+  llvm::SmallVector<llvm::Value *, 4> OriginalValue;
+  // Stripped off array/pointer to get the underlying
+  // element type
+  llvm::SmallVector<llvm::Type *, 4> BaseType;
+
+  /// Append arrays in \a CurInfo.
+  void append(MapInfoData &CurInfo) {
+    IsDeclareTarget.append(CurInfo.IsDeclareTarget.begin(),
+                           CurInfo.IsDeclareTarget.end());
+    MapClause.append(CurInfo.MapClause.begin(), CurInfo.MapClause.end());
+    OriginalValue.append(CurInfo.OriginalValue.begin(),
+                         CurInfo.OriginalValue.end());
+    BaseType.append(CurInfo.BaseType.begin(), CurInfo.BaseType.end());
+    llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
+  }
+};
+
+uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
+  if (auto nestedArrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(
+          arrTy.getElementType()))
+    return getArrayElementSizeInBits(nestedArrTy, dl);
+  return dl.getTypeSizeInBits(arrTy.getElementType());
+}
+
+// This function calculates the size to be offloaded for a specified type, given
+// its associated map clause (which can contain bounds information which affects
+// the total size), this size is calculated based on the underlying element type
+// e.g. given a 1-D array of ints, we will calculate the size from the integer
+// type * number of elements in the array. This size can be used in other
+// calculations but is ultimately used as an argument to the OpenMP runtimes
+// kernel argument structure which is generated through the combinedInfo data
+// structures.
+// This function is somewhat equivalent to Clang's getExprTypeSize inside of
+// CGOpenMPRuntime.cpp.
+llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
+                            Operation *clauseOp, llvm::IRBuilderBase &builder,
+                            LLVM::ModuleTranslation &moduleTranslation) {
+  // utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
+  // the size in inconsistent byte or bit format.
+  uint64_t underlyingTypeSzInBits = dl.getTypeSizeInBits(type);
+  if (auto arrTy = llvm::dyn_cast_if_present<LLVM::LLVMArrayType>(type)) {
+    underlyingTypeSzInBits = getArrayElementSizeInBits(arrTy, dl);
+  }
+
+  if (auto memberClause =
+          mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(clauseOp)) {
+    // This calculates the size to transfer based on bounds and the underlying
+    // element type, provided bounds have been specified (Fortran
+    // pointers/allocatables/target and arrays that have sections specified fall
+    // into this as well).
+    if (!memberClause.getBounds().empty()) {
+      llvm::Value *elementCount = builder.getInt64(1);
+      for (auto bounds : memberClause.getBounds()) {
+        if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
+                bounds.getDefiningOp())) {
+          // The below calculation for the size to be mapped calculated from the
+          // map_info's bounds is: (elemCount * [UB - LB] + 1), later we
+          // multiply by the underlying element types byte size to get the full
+          // size to be offloaded based on the bounds
+          elementCount = builder.CreateMul(
+              elementCount,
+              builder.CreateAdd(
+                  builder.CreateSub(
+                      moduleTranslation.lookupValue(boundOp.getUpperBound()),
+                      moduleTranslation.lookupValue(boundOp.getLowerBound())),
+                  builder.getInt64(1)));
+        }
+      }
+
+      // The size in bytes x number of elements, the sizeInBytes stored is
+      // the underyling types size, e.g. if ptr<i32>, it'll be the i32's
+      // size, so we do some on the fly runtime math to get the size in
+      // bytes from the extent (ub - lb) * sizeInBytes. NOTE: This may need
+      // some adjustment for members with more complex types.
+      return builder.CreateMul(elementCount,
+                               builder.getInt64(underlyingTypeSzInBits / 8));
+    }
+  }
+
+  return builder.getInt64(underlyingTypeSzInBits / 8);
+}
+
+void collectMapDataFromMapOperands(MapInfoData &mapData,
+                                   llvm::SmallVectorImpl<Value> &mapOperands,
+                                   LLVM::ModuleTranslation &moduleTranslation,
+                                   DataLayout &dl,
+                                   llvm::IRBuilderBase &builder) {
+  for (mlir::Value mapValue : mapOperands) {
+    assert(mlir::isa<mlir::omp::MapInfoOp>(mapValue.getDefiningOp()) &&
+           "missing map info operation or incorrect map info operation type");
+    if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+            mapValue.getDefiningOp())) {
+      mapData.OriginalValue.push_back(
+          moduleTranslation.lookupValue(mapOp.getVarPtr()));
+      mapData.Pointers.push_back(mapData.OriginalValue.back());
+
+      if (llvm::Value *refPtr = getRefPtrIfDeclareTarget(
+              mapOp.getVarPtr(), moduleTranslation)) { // declare target
+        mapData.IsDeclareTarget.push_back(true);
+        mapData.BasePointers.push_back(refPtr);
+      } else { // regular mapped variable
+        mapData.IsDeclareTarget.push_back(false);
+        mapData.BasePointers.push_back(mapData.OriginalValue.back());
+      }
+
+      mapData.Sizes.push_back(getSizeInBytes(dl, mapOp.getVarType(), mapOp,
+                                             builder, moduleTranslation));
+      mapData.BaseType.push_back(
+          moduleTranslation.convertType(mapOp.getVarType()));
+      mapData.MapClause.push_back(mapOp.getOperation());
+      mapData.Types.push_back(
+          llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
+      mapData.Names.push_back(LLVM::createMappingInformation(
+          mapOp.getLoc(), *moduleTranslation.getOpenMPBuilder()));
+      mapData.DevicePointers.push_back(
+          llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+    }
+  }
+}
+
 // Generate all map related information and fill the combinedInfo.
 static void genMapInfos(llvm::IRBuilderBase &builder,
                         LLVM::ModuleTranslation &moduleTranslation,
-                        DataLayout &DL,
+                        DataLayout &dl,
                         llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo,
-                        const SmallVector<Value> &mapOperands,
-                        const ArrayAttr &mapTypes,
+                        MapInfoData &mapData,
                         const SmallVector<Value> &devPtrOperands = {},
                         const SmallVector<Value> &devAddrOperands = {},
                         bool isTargetParams = false) {
@@ -1659,58 +1780,39 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
     combinedInfo.Names.clear();
   };
 
-  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
-    index = 0;
-    for (auto basePtr : combinedInfo.BasePointers) {
-      if (basePtr == val)
-        return true;
-      index++;
-    }
-    return false;
-  };
-
-  unsigned index = 0;
-  for (const auto &mapOp : mapOperands) {
-    // Unlike dev_ptr and dev_addr operands these map operands point
-    // to a map entry operation which contains further information
-    // on the variable being mapped and how it should be mapped.
-    auto mapInfoOp =
-        mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
-
-    // TODO: Only LLVMPointerTypes are handled.
-    if (!mapInfoOp.getType().isa<LLVM::LLVMPointerType>())
-      return fail();
-
-    llvm::Value *mapOpValue =
-        moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
-
-    llvm::Value *refPtr =
-        getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(), moduleTranslation);
-
-    combinedInfo.BasePointers.emplace_back(refPtr ? refPtr : mapOpValue);
-    combinedInfo.Pointers.emplace_back(mapOpValue);
-    combinedInfo.DevicePointers.emplace_back(
-        llvm::OpenMPIRBuilder::DeviceInfoTy::None);
-    combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
-        mapInfoOp.getVarPtr().getLoc(), *ompBuilder));
-
-    auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
-        mapTypes[index].cast<IntegerAttr>().getUInt());
-
+  // We operate under the assumption that all vectors that are
+  // required in MapInfoData are of equal lengths (either filled with
+  // default constructed data or appropiate information) so we can
+  // utilise the size from any component of MapInfoData, if we can't
+  // something is missing from the initial MapInfoData construction.
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
     // Declare Target Mappings are excluded from being marked as
     // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're marked
     // with OMP_MAP_PTR_AND_OBJ instead.
-    if (refPtr)
+    auto mapFlag = mapData.Types[i];
+    if (mapData.IsDeclareTarget[i])
       mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
     else if (isTargetParams)
       mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
 
+    combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]);
+    combinedInfo.Pointers.emplace_back(mapData.Pointers[i]);
+    combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[i]);
+    combinedInfo.Names.emplace_back(mapData.Names[i]);
     combinedInfo.Types.emplace_back(mapFlag);
-    combinedInfo.Sizes.emplace_back(builder.getInt64(getSizeInBytes(
-        DL, mapInfoOp.getVarPtr().getType(), mapInfoOp.getVarType())));
-    index++;
+    combinedInfo.Sizes.emplace_back(mapData.Sizes[i]);
   }
 
+  auto findMapInfo = [&combinedInfo](llvm::Value *val, unsigned &index) {
+    index = 0;
+    for (llvm::Value *basePtr : combinedInfo.BasePointers) {
+      if (basePtr == val)
+        return true;
+      index++;
+    }
+    return false;
+  };
+
   auto addDevInfos = [&, fail](auto devOperands, auto devOpType) -> void {
     for (const auto &devOp : devOperands) {
       // TODO: Only LLVMPointerTypes are handled.
@@ -1756,19 +1858,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
 
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
-  auto getMapTypes = [](mlir::OperandRange mapOperands,
-                        mlir::MLIRContext *ctx) {
-    SmallVector<mlir::Attribute> mapTypes;
-    for (auto mapValue : mapOperands) {
-      if (mapValue.getDefiningOp()) {
-        auto mapOp =
-            mlir::dyn_cast<mlir::omp::MapInfoOp>(mapValue.getDefiningOp());
-        mapTypes.push_back(mapOp.getMapTypeAttr());
-      }
-    }
-    return mlir::ArrayAttr::get(ctx, mapTypes);
-  };
-
   LogicalResult result =
       llvm::TypeSwitch<Operation *, LogicalResult>(op)
           .Case([&](omp::DataOp dataOp) {
@@ -1782,8 +1871,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
                   deviceID = intAttr.getInt();
 
             mapOperands = dataOp.getMapOperands();
-            mapTypes =
-                getMapTypes(dataOp.getMapOperands(), dataOp->getContext());
             useDevPtrOperands = dataOp.getUseDevicePtr();
             useDevAddrOperands = dataOp.getUseDeviceAddr();
             return success();
@@ -1802,8 +1889,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
                   deviceID = intAttr.getInt();
             RTLFn = llvm::omp::OMPRTL___tgt_target_data_begin_mapper;
             mapOperands = enterDataOp.getMapOperands();
-            mapTypes = getMapTypes(enterDataOp.getMapOperands(),
-                                   enterDataOp->getContext());
             return success();
           })
           .Case([&](omp::ExitDataOp exitDataOp) {
@@ -1821,8 +1906,6 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
 
             RTLFn = llvm::omp::OMPRTL___tgt_target_data_end_mapper;
             mapOperands = exitDataOp.getMapOperands();
-            mapTypes = getMapTypes(exitDataOp.getMapOperands(),
-                                   exitDataOp->getContext());
             return success();
           })
           .Default([&](Operation *op) {
@@ -1835,17 +1918,20 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
 
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
 
+  MapInfoData mapData;
+  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, DL,
+                                builder);
+
   // Fill up the arrays with all the mapped variables.
   llvm::OpenMPIRBuilder::MapInfosTy combinedInfo;
   auto genMapInfoCB =
       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
     builder.restoreIP(codeGenIP);
-    if (auto DataOp = dyn_cast<omp::DataOp>(op)) {
-      genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapOperands,
-                  mapTypes, useDevPtrOperands, useDevAddrOperands);
+    if (auto dataOp = dyn_cast<omp::DataOp>(op)) {
+      genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData,
+                  useDevPtrOperands, useDevAddrOperands);
     } else {
-      genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapOperands,
-                  mapTypes);
+      genMapInfos(builder, moduleTranslation, DL, combinedInfo, mapData);
     }
     return combinedInfo;
   };
@@ -1997,61 +2083,232 @@ static bool targetOpSupported(Operation &opInst) {
 }
 
 static void
-handleDeclareTargetMapVar(llvm::ArrayRef<Value> mapOperands,
+handleDeclareTargetMapVar(MapInfoData &mapData,
                           LLVM::ModuleTranslation &moduleTranslation,
                           llvm::IRBuilderBase &builder) {
-  for (const mlir::Value &mapOp : mapOperands) {
-    auto mapInfoOp =
-        mlir::dyn_cast<mlir::omp::MapInfoOp>(mapOp.getDefiningOp());
-    llvm::Value *mapOpValue =
-        moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
-    if (auto *declareTarget = getRefPtrIfDeclareTarget(mapInfoOp.getVarPtr(),
-                                                       moduleTranslation)) {
-      // The user's iterator will get invalidated if we modify an element,
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+    // In the case of declare target mapped variables, the basePointer is
+    // the reference pointer generated by the convertDeclareTargetAttr
+    // method. Whereas the kernelValue is the original variable, so for
+    // the device we must replace all uses of this original global variable
+    // (stored in kernelValue) with the reference pointer (stored in
+    // basePointer for declare target mapped variables), as for device the
+    // data is mapped into this reference pointer and should be loaded
+    // from it, the original variable is discarded. On host both exist and
+    // metadata is generated (elsewhere in the convertDeclareTargetAttr)
+    // function to link the two variables in the runtime and then both the
+    // reference pointer and the pointer are assigned in the kernel argument
+    // structure for the host.
+    if (mapData.IsDeclareTarget[i]) {
+      // The users iterator will get invalidated if we modify an element,
       // so we populate this vector of uses to alter each user on an individual
       // basis to emit its own load (rather than one load for all).
       llvm::SmallVector<llvm::User *> userVec;
-      for (llvm::User *user : mapOpValue->users())
+      for (llvm::User *user : mapData.OriginalValue[i]->users())
         userVec.push_back(user);
 
       for (llvm::User *user : userVec) {
         if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
-          auto *load = builder.CreateLoad(
-              moduleTranslation.convertType(mapInfoOp.getVarPtr().getType()),
-              declareTarget);
+          auto *load = builder.CreateLoad(mapData.BasePointers[i]->getType(),
+                                          mapData.BasePointers[i]);
           load->moveBefore(insn);
-          user->replaceUsesOfWith(mapOpValue, load);
+          user->replaceUsesOfWith(mapData.OriginalValue[i], load);
         }
       }
     }
   }
 }
 
+// The createDeviceArgumentAccessor function generates
+// instructions for retrieving (acessing) kernel
+// arguments inside of the device kernel for use by
+// the kernel. This enables 
diff erent semantics such as
+// the creation of temporary copies of data allowing
+// semantics like read-only/no host write back kernel
+// arguments.
+//
+// This currently implements a very light version of Clang's
+// EmitParmDecl's handling of direct argument handling as well
+// as a portion of the argument access generation based on
+// capture types found at the end of emitOutlinedFunctionPrologue
+// in Clang. The indirect path handling of EmitParmDecl's may be
+// required for future work, but a direct 1-to-1 copy doesn't seem
+// possible as the logic is rather scattered throughout Clang's
+// lowering and perhaps we wish to deviate slightly.
+//
+// \param mapData - A container containing vectors of information
+// corresponding to the input argument, which should have a
+// corresponding entry in the MapInfoData containers
+// OrigialValue's.
+// \param arg - This is the generated kernel function argument that
+// corresponds to the passed in input argument. We generated 
diff erent
+// accesses of this Argument, based on capture type and other Input
+// related information.
+// \param input - This is the host side value that will be passed to
+// the kernel i.e. the kernel input, we rewrite all uses of this within
+// the kernel (as we generate the kernel body based on the target's region
+// which maintians references to the original input) to the retVal argument
+// apon exit of this function inside of the OMPIRBuilder. This interlinks
+// the kernel argument to future uses of it in the function providing
+// appropriate "glue" instructions inbetween.
+// \param retVal - This is the value that all uses of input inside of the
+// kernel will be re-written to, the goal of this function is to generate
+// an appropriate location for the kernel argument to be accessed from,
+// e.g. ByRef will result in a temporary allocation location and then
+// a store of the kernel argument into this allocated memory which
+// will then be loaded from, ByCopy will use the allocated memory
+// directly.
 static llvm::IRBuilderBase::InsertPoint
-createDeviceArgumentAccessor(llvm::Argument &arg, llvm::Value *input,
-                             llvm::Value *&retVal, llvm::IRBuilderBase &builder,
+createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
+                             llvm::Value *input, llvm::Value *&retVal,
+                             llvm::IRBuilderBase &builder,
                              llvm::OpenMPIRBuilder &ompBuilder,
                              LLVM::ModuleTranslation &moduleTranslation,
                              llvm::IRBuilderBase::InsertPoint allocaIP,
                              llvm::IRBuilderBase::InsertPoint codeGenIP) {
   builder.restoreIP(allocaIP);
 
-  llvm::Value *addr =
+  mlir::omp::VariableCaptureKind capture =
+      mlir::omp::VariableCaptureKind::ByRef;
+  llvm::Type *inputType = input->getType();
+
+  // Find the associated MapInfoData entry for the current input
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i)
+    if (mapData.OriginalValue[i] == input) {
+      if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+              mapData.MapClause[i])) {
+        capture = mapOp.getMapCaptureType().value_or(
+            mlir::omp::VariableCaptureKind::ByRef);
+      }
+
+      inputType = mapData.BaseType[i];
+      break;
+    }
+
+  unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
+  unsigned int defaultAS =
+      ompBuilder.M.getDataLayout().getProgramAddressSpace();
+
+  // Create the alloca for the argument the current point.
+  llvm::Value *v =
       builder.CreateAlloca(arg.getType()->isPointerTy()
                                ? arg.getType()
                                : llvm::Type::getInt64Ty(builder.getContext()),
                            ompBuilder.M.getDataLayout().getAllocaAddrSpace());
-  llvm::Value *addrAscast =
-      arg.getType()->isPointerTy()
-          ? builder.CreatePointerBitCastOrAddrSpaceCast(addr, input->getType())
-          : addr;
 
-  builder.CreateStore(&arg, addrAscast);
+  if (allocaAS != defaultAS && arg.getType()->isPointerTy()) {
+    v = builder.CreatePointerBitCastOrAddrSpaceCast(
+        v, arg.getType()->getPointerTo(defaultAS));
+  }
+
+  builder.CreateStore(&arg, v);
+
   builder.restoreIP(codeGenIP);
-  retVal = builder.CreateLoad(arg.getType(), addrAscast);
+
+  switch (capture) {
+  case mlir::omp::VariableCaptureKind::ByCopy: {
+    // RHS of || aims to ignore conversions like int -> uint, but further
+    // extension of this path must be implemented for the moment it'll fall
+    // through to the assert.
+    if (inputType->isPointerTy() || v->getType() == inputType->getPointerTo()) {
+      retVal = v;
+      return builder.saveIP();
+    }
+
+    assert(false && "Currently unsupported OMPTargetVarCaptureByCopy Type");
+    break;
+  }
+  case mlir::omp::VariableCaptureKind::ByRef: {
+    retVal = builder.CreateAlignedLoad(
+        v->getType(), v,
+        ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
+    break;
+  }
+  case mlir::omp::VariableCaptureKind::This:
+  case mlir::omp::VariableCaptureKind::VLAType:
+    assert(false && "Currently unsupported capture kind");
+    break;
+  }
+
   return builder.saveIP();
 }
 
+// This is a variation on Clang's GenerateOpenMPCapturedVars, which
+// generates 
diff erent operation (e.g. load/store) combinations for
+// arguments to the kernel, based on map capture kinds which are then
+// utilised in the combinedInfo in place of the original Map value.
+static void
+createAlteredByCaptureMap(MapInfoData &mapData,
+                          LLVM::ModuleTranslation &moduleTranslation,
+                          llvm::IRBuilderBase &builder) {
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+    // if it's declare target, skip it, it's handled seperately.
+    if (!mapData.IsDeclareTarget[i]) {
+      mlir::omp::VariableCaptureKind captureKind =
+          mlir::omp::VariableCaptureKind::ByRef;
+
+      if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+              mapData.MapClause[i])) {
+        captureKind = mapOp.getMapCaptureType().value_or(
+            mlir::omp::VariableCaptureKind::ByRef);
+      }
+
+      switch (captureKind) {
+      case mlir::omp::VariableCaptureKind::ByRef: {
+        // Currently handles array sectioning lowerbound case, but more
+        // logic may be required in the future. Clang invokes EmitLValue,
+        // which has specialised logic for special Clang types such as user
+        // defines, so it is possible we will have to extend this for
+        // structures or other complex types. As the general idea is that this
+        // function mimics some of the logic from Clang that we require for
+        // kernel argument passing from host -> device.
+        if (auto mapOp = mlir::dyn_cast_if_present<mlir::omp::MapInfoOp>(
+                mapData.MapClause[i])) {
+          if (!mapOp.getBounds().empty() && mapData.BaseType[i]->isArrayTy()) {
+
+            std::vector<llvm::Value *> idx =
+                std::vector<llvm::Value *>{builder.getInt64(0)};
+            for (int i = mapOp.getBounds().size() - 1; i >= 0; --i) {
+              if (auto boundOp =
+                      mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
+                          mapOp.getBounds()[i].getDefiningOp())) {
+                idx.push_back(
+                    moduleTranslation.lookupValue(boundOp.getLowerBound()));
+              }
+            }
+
+            mapData.Pointers[i] = builder.CreateInBoundsGEP(
+                mapData.BaseType[i], mapData.Pointers[i], idx);
+          }
+        }
+      } break;
+      case mlir::omp::VariableCaptureKind::ByCopy: {
+        llvm::Type *type = mapData.BaseType[i];
+        llvm::Value *newV = builder.CreateLoad(type, mapData.Pointers[i]);
+
+        if (!type->isPointerTy()) {
+          auto curInsert = builder.saveIP();
+          builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
+          auto *memTempAlloc =
+              builder.CreateAlloca(builder.getInt8PtrTy(), nullptr, ".casted");
+          builder.restoreIP(curInsert);
+
+          builder.CreateStore(newV, memTempAlloc);
+          newV = builder.CreateLoad(builder.getInt8PtrTy(), memTempAlloc);
+        }
+
+        mapData.Pointers[i] = newV;
+        mapData.BasePointers[i] = newV;
+      } break;
+      case mlir::omp::VariableCaptureKind::This:
+      case mlir::omp::VariableCaptureKind::VLAType:
+        mapData.MapClause[i]->emitOpError("Unhandled capture kind");
+        break;
+      }
+    }
+  }
+}
+
 static LogicalResult
 convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
@@ -2062,26 +2319,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   auto targetOp = cast<omp::TargetOp>(opInst);
   auto &targetRegion = targetOp.getRegion();
 
-  // This function filters out kernel data that will not show up as kernel
-  // input arguments to the generated kernel function but will still need
-  // explicitly mapped through supplying information to the OpenMP runtime
-  // (declare target). It also prepares some data used for generating the
-  // kernel and populating the associated OpenMP runtime data structures.
-  auto getKernelArguments =
-      [&](const llvm::SetVector<Value> &operandSet,
-          llvm::SmallVectorImpl<llvm::Value *> &llvmInputs) {
-        for (Value operand : operandSet) {
-          if (!getRefPtrIfDeclareTarget(operand, moduleTranslation))
-            llvmInputs.push_back(moduleTranslation.lookupValue(operand));
-        }
-      };
-
-  llvm::SetVector<Value> operandSet;
-  getUsedValuesDefinedAbove(targetRegion, operandSet);
-
-  llvm::SmallVector<llvm::Value *> inputs;
-  getKernelArguments(operandSet, inputs);
-
   LogicalResult bodyGenStatus = success();
 
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
@@ -2115,31 +2352,32 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
-  DataLayout DL = DataLayout(opInst.getParentOfType<ModuleOp>());
-  SmallVector<Value> mapOperands = targetOp.getMapOperands();
-
-  auto getMapTypes = [](mlir::OperandRange mapOperands,
-                        mlir::MLIRContext *ctx) {
-    SmallVector<mlir::Attribute> mapTypes;
-    for (auto mapValue : mapOperands) {
-      if (mapValue.getDefiningOp()) {
-        auto mapOp =
-            mlir::dyn_cast<mlir::omp::MapInfoOp>(mapValue.getDefiningOp());
-        mapTypes.push_back(mapOp.getMapTypeAttr());
-      }
-    }
-    return mlir::ArrayAttr::get(ctx, mapTypes);
-  };
-
-  ArrayAttr mapTypes =
-      getMapTypes(targetOp.getMapOperands(), targetOp->getContext());
+  DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
+  llvm::SmallVector<Value> mapOperands = targetOp.getMapOperands();
+  MapInfoData mapData;
+  collectMapDataFromMapOperands(mapData, mapOperands, moduleTranslation, dl,
+                                builder);
+
+  // We wish to modify some of the methods in which kernel arguments are
+  // passed based on their capture type by the target region, this can
+  // involve generating new loads and stores, which changes the
+  // MLIR value to LLVM value mapping, however, we only wish to do this
+  // locally for the current function/target and also avoid altering
+  // ModuleTranslation, so we remap the base pointer or pointer stored
+  // in the map infos corresponding MapInfoData, which is later accessed
+  // by genMapInfos and createTarget to help generate the kernel and
+  // kernel arg structure. It primarily becomes relevant in cases like
+  // bycopy, or byref range'd arrays. In the default case, we simply
+  // pass thee pointer byref as both basePointer and pointer.
+  if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
+    createAlteredByCaptureMap(mapData, moduleTranslation, builder);
 
   llvm::OpenMPIRBuilder::MapInfosTy combinedInfos;
   auto genMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
       -> llvm::OpenMPIRBuilder::MapInfosTy & {
     builder.restoreIP(codeGenIP);
-    genMapInfos(builder, moduleTranslation, DL, combinedInfos, mapOperands,
-                mapTypes, {}, {}, true);
+    genMapInfos(builder, moduleTranslation, dl, combinedInfos, mapData, {}, {},
+                true);
     return combinedInfos;
   };
 
@@ -2158,22 +2396,27 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       return codeGenIP;
     }
 
-    return createDeviceArgumentAccessor(arg, input, retVal, builder,
+    return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
                                         *ompBuilder, moduleTranslation,
                                         allocaIP, codeGenIP);
   };
 
+  llvm::SmallVector<llvm::Value *, 4> kernelInput;
+  for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+    // declare target arguments are not passed to kernels as arguments
+    if (!mapData.IsDeclareTarget[i])
+      kernelInput.push_back(mapData.OriginalValue[i]);
+  }
+
   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
       ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
-      defaultValThreads, inputs, genMapInfoCB, bodyCB, argAccessorCB));
+      defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB));
 
   // Remap access operations to declare target reference pointers for the
   // device, essentially generating extra loadop's as necessary
-  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) {
-    SmallVector<Value> mapOperands = targetOp.getMapOperands();
-    handleDeclareTargetMapVar(llvm::ArrayRef(mapOperands), moduleTranslation,
-                              builder);
-  }
+  if (moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice())
+    handleDeclareTargetMapVar(mapData, moduleTranslation, builder);
+
   return bodyGenStatus;
 }
 

diff  --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
new file mode 100644
index 000000000000000..056085123480baf
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
@@ -0,0 +1,56 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// This test checks the offload sizes provided to the OpenMP kernel argument
+// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds 
+// provided for a 3-D array. One with full default size, and the other with 
+// a user specified OpenMP array sectioning. We expect the default sized 
+// array bounds to lower to the full size of the array and the sectioned 
+// array to be the size of 3*3*1*element-byte-size (36 bytes in this case).
+
+module attributes {omp.is_target_device = false} {
+  llvm.func @_3d_target_array_section() {
+    %0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr
+    %1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr
+    %2 = llvm.mlir.constant(1 : index) : i64
+    %3 = llvm.mlir.constant(0 : index) : i64
+    %4 = llvm.mlir.constant(2 : index) : i64
+    %5 = omp.bounds   lower_bound(%3 : i64) upper_bound(%4 : i64) stride(%2 : i64) start_idx(%2 : i64)
+    %6 = omp.bounds   lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64)
+    %7 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>)   map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"}
+    %8 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>)   map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"}
+    omp.target   map_entries(%7, %8 : !llvm.ptr, !llvm.ptr) {
+      %9 = llvm.mlir.constant(0 : i64) : i64
+      %10 = llvm.mlir.constant(1 : i64) : i64
+      %11 = llvm.getelementptr %0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>
+      %12 = llvm.load %11 : !llvm.ptr -> i32
+      %13 = llvm.getelementptr %1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>
+      llvm.store %12, %13 : i32, !llvm.ptr
+      omp.terminator
+    }
+    llvm.return
+  }
+  llvm.mlir.global internal @_QFEinarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> {
+    %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>>
+    llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>>
+  }
+  llvm.mlir.global internal @_QFEoutarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> {
+    %0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>>
+    llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>>
+  }
+}
+
+// CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 36, i64 108]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 35, i64 35]
+// CHECKL: @.offload_mapnames = private constant [2 x ptr] [ptr @0, ptr @1]
+
+// CHECK: define void @_3d_target_array_section()
+
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr @_QFEinarray, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr getelementptr inbounds ([3 x [3 x [3 x i32]]], ptr @_QFEinarray, i64 0, i64 1, i64 0, i64 0), ptr %[[OFFLOADPTRS]], align 8
+
+// CHECK: %[[OFFLOADBASEPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADBASEPTRS2]], align 8
+// CHECK: %[[OFFLOADPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADPTRS2]], align 8

diff  --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
new file mode 100644
index 000000000000000..cf6b7257ac606dc
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = true} {
+  llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
+    %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
+    %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
+    %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
+    %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"}
+    omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) {
+      %4 = llvm.load %0 : !llvm.ptr -> i32
+      llvm.store %4, %1 : i32, !llvm.ptr
+      omp.terminator
+    }
+    llvm.return
+  }
+  llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 {
+    %0 = llvm.mlir.constant(1 : i32) : i32
+    llvm.return %0 : i32
+  }
+  llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 {
+    %0 = llvm.mlir.constant(0 : i32) : i32
+    llvm.return %0 : i32
+  }
+}
+
+// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) {
+
+// CHECK: entry:
+// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
+// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8
+// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8
+// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8
+
+// CHECK: user_code.entry:                                  ; preds = %entry
+// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 
+// CHECK: br label %omp.target
+
+// CHECK: omp.target:                                       ; preds = %user_code.entry
+// CHECK:  %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4
+// CHECK:  store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4
+// CHECK: br label %omp.region.cont

diff  --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
new file mode 100644
index 000000000000000..ca5dad8b4fc9a85
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
@@ -0,0 +1,42 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+  llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
+    %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
+    %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
+    %2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
+    %3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"}
+    omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) {
+      %4 = llvm.load %0 : !llvm.ptr -> i32
+      llvm.store %4, %1 : i32, !llvm.ptr
+      omp.terminator
+    }
+    llvm.return
+  }
+  llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 {
+    %0 = llvm.mlir.constant(1 : i32) : i32
+    llvm.return %0 : i32
+  }
+  llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 {
+    %0 = llvm.mlir.constant(0 : i32) : i32
+    llvm.return %0 : i32
+  }
+}
+
+// CHECK: define void @_QQmain() {
+// CHECK: %[[BYCOPY_ALLOCA:.*]] = alloca ptr, align 8
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK: %[[LOAD_VAL:.*]] = load i32, ptr @_QFEi, align 4
+// CHECK: store i32 %[[LOAD_VAL]], ptr %[[BYCOPY_ALLOCA]], align 4
+// CHECK: %[[BYCOPY_LOAD:.*]] = load ptr, ptr %[[BYCOPY_ALLOCA]], align 8
+
+// CHECK: %[[BASEPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr @_QFEsp, ptr %[[BASEPTR_BYREF]], align 8
+// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr @_QFEsp, ptr %[[OFFLOADPTR_BYREF]], align 8
+
+// CHECK: %[[BASEPTR_BYCOPY:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[BASEPTR_BYCOPY]], align 8
+// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[OFFLOADPTR_BYREF]], align 8

diff  --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index f2431ec87933f95..9221b410d766ed4 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -38,15 +38,20 @@ llvm.func @_QPopenmp_target_data() {
 
 // -----
 
-llvm.func @_QPopenmp_target_data_region(%1 : !llvm.ptr) {
-  %2 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
-  omp.target_data map_entries(%2 : !llvm.ptr) {
-    %3 = llvm.mlir.constant(99 : i32) : i32
-    %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.mlir.constant(1 : i64) : i64
-    %6 = llvm.mlir.constant(0 : i64) : i64
-    %7 = llvm.getelementptr %1[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32>
-    llvm.store %3, %7 : i32, !llvm.ptr
+llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) {
+  %1 = llvm.mlir.constant(1023 : index) : i64
+  %2 = llvm.mlir.constant(0 : index) : i64
+  %3 = llvm.mlir.constant(1024 : index) : i64
+  %4 = llvm.mlir.constant(1 : index) : i64
+  %5 = omp.bounds   lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64)
+  %6 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) bounds(%5)  -> !llvm.ptr {name = ""}
+  omp.target_data map_entries(%6 : !llvm.ptr) {
+    %7 = llvm.mlir.constant(99 : i32) : i32
+    %8 = llvm.mlir.constant(1 : i64) : i64
+    %9 = llvm.mlir.constant(1 : i64) : i64
+    %10 = llvm.mlir.constant(0 : i64) : i64
+    %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32>
+    llvm.store %7, %11 : i32, !llvm.ptr
     omp.terminator
   }
   llvm.return
@@ -92,16 +97,36 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) {
   %11 = llvm.mlir.constant(10 : i32) : i32
   %12 = llvm.icmp "slt" %10, %11 : i32
   %13 = llvm.load %5 : !llvm.ptr -> i32
-  %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""}
-  %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
+  %14 = llvm.mlir.constant(1023 : index) : i64
+  %15 = llvm.mlir.constant(0 : index) : i64
+  %16 = llvm.mlir.constant(1024 : index) : i64
+  %17 = llvm.mlir.constant(1 : index) : i64
+  %18 = omp.bounds   lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64)
+  %map1 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""}
+  %19 = llvm.mlir.constant(511 : index) : i64
+  %20 = llvm.mlir.constant(0 : index) : i64
+  %21 = llvm.mlir.constant(512 : index) : i64
+  %22 = llvm.mlir.constant(1 : index) : i64
+  %23 = omp.bounds   lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64)
+  %map2 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""}
   omp.target_enter_data   if(%12 : i1) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr)
-  %14 = llvm.load %7 : !llvm.ptr -> i32
-  %15 = llvm.mlir.constant(10 : i32) : i32
-  %16 = llvm.icmp "sgt" %14, %15 : i32
-  %17 = llvm.load %5 : !llvm.ptr -> i32
-  %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""}
-  %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) -> !llvm.ptr {name = ""}
-  omp.target_exit_data   if(%16 : i1) device(%17 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr)
+  %24 = llvm.load %7 : !llvm.ptr -> i32
+  %25 = llvm.mlir.constant(10 : i32) : i32
+  %26 = llvm.icmp "sgt" %24, %25 : i32
+  %27 = llvm.load %5 : !llvm.ptr -> i32
+  %28 = llvm.mlir.constant(1023 : index) : i64
+  %29 = llvm.mlir.constant(0 : index) : i64
+  %30 = llvm.mlir.constant(1024 : index) : i64
+  %31 = llvm.mlir.constant(1 : index) : i64
+  %32 = omp.bounds   lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64)
+  %map3 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>)   map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""}
+  %33 = llvm.mlir.constant(511 : index) : i64
+  %34 = llvm.mlir.constant(0 : index) : i64
+  %35 = llvm.mlir.constant(512 : index) : i64
+  %36 = llvm.mlir.constant(1 : index) : i64
+  %37 = omp.bounds   lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64)
+  %map4 = omp.map_info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>)   map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""}
+  omp.target_exit_data   if(%26 : i1) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr)
   llvm.return
 }
 

diff  --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir
index 20ad6d30c2f52ef..1d8799ecd446f03 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir
@@ -38,7 +38,7 @@ module attributes {omp.is_target_device = false} {
 // CHECK: store ptr %[[ADDR_B]], ptr %[[GEP2]], align 8
 // CHECK: %[[GEP3:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 2
 // CHECK: store ptr %[[ADDR_C]], ptr %[[GEP3]], align 8
-// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]])
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par, ptr %[[STRUCTARG]])
 
 
 // CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE]]..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %[[STRUCTARG2:.*]]) #0 {

diff  --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
new file mode 100644
index 000000000000000..11d3b6936bcea2e
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
@@ -0,0 +1,27 @@
+! Basic offloading test of arrays with provided lower 
+! and upper bounds as specified by OpenMP's sectioning
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+    implicit none
+    integer :: write_arr(10) =  (/0,0,0,0,0,0,0,0,0,0/)
+    integer :: read_arr(10) = (/1,2,3,4,5,6,7,8,9,10/)
+    integer :: i = 2
+
+    !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(tofrom:i)
+        do i = 2, 5
+            write_arr(i) = read_arr(i)
+        end do
+    !$omp end target
+    
+    print *, write_arr(:)
+end program
+
+! CHECK: 0 2 3 4 5 0 0 0 0 0

diff  --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
new file mode 100644
index 000000000000000..28b2afced4d1bc6
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
@@ -0,0 +1,39 @@
+! Basic offloading test of a regular array explicitly
+! passed within a target region
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+    implicit none
+    integer :: inArray(3,3,3)
+    integer :: outArray(3,3,3)
+    integer :: i, j, k 
+
+    do i = 1, 3
+      do j = 1, 3
+        do k = 1, 3
+            inArray(i, j, k) = 42
+            outArray(i, j, k) = 0
+        end do
+       end do
+    end do
+
+!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k)
+    do j = 1, 3
+      do k = 1, 3
+        outArray(k, j, 2) = inArray(k, j, 2)
+      end do
+    end do
+!$omp end target
+
+ print *, outArray
+
+end program
+
+! CHECK:  0 0 0 0 0 0 0 0 0 42 42 42 42 42 42 42 42 42 0 0 0 0 0 0 0 0 0

diff  --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
new file mode 100644
index 000000000000000..58f42138ad0affb
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
@@ -0,0 +1,45 @@
+! Basic offloading test of a regular array explicitly
+! passed within a target region
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+    implicit none
+    integer :: x(2,2,2)
+    integer :: i = 1, j = 1, k = 1
+    integer :: counter = 1
+    do i = 1, 2
+        do j = 1, 2
+          do k = 1, 2
+            x(i, j, k) = 0
+          end do
+        end do
+    end do
+
+!$omp target map(tofrom:x, i, j, k, counter)
+    do i = 1, 2
+        do j = 1, 2
+          do k = 1, 2
+            x(i, j, k) = counter
+            counter = counter + 1
+          end do
+        end do
+    end do
+!$omp end target
+
+     do i = 1, 2
+        do j = 1, 2
+          do k = 1, 2
+            print *, x(i, j, k)
+          end do
+        end do
+    end do
+end program main
+  
+! CHECK: 1 2 3 4 5 6 7 8

diff  --git a/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90 b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90
new file mode 100644
index 000000000000000..d3c799ff3334f4d
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90
@@ -0,0 +1,27 @@
+! Basic offloading test of a regular array explicitly
+! passed within a target region
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+    integer :: x(10) = (/0,0,0,0,0,0,0,0,0,0/)
+    integer :: i = 1
+    integer :: j = 11
+
+  !$omp target map(tofrom:x, i, j)
+     do while (i <= j)
+        x(i) = i;
+        i = i + 1
+    end do
+  !$omp end target
+
+   PRINT *, x(:)
+end program main
+  
+! CHECK: 1 2 3 4 5 6 7 8 9 10