[llvm-branch-commits] [flang] [llvm] [mlir] [OpenMP][MLIR] Modify lowering OpenMP Dialect lowering to support attach mapping (PR #179023)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Jan 31 01:03:51 PST 2026


https://github.com/agozillon created https://github.com/llvm/llvm-project/pull/179023

This PR adjusts the LLVM-IR lowering to support the new attach map type that the runtime uses to link data and pointer together, this swaps the mapping from the older OMP_MAP_PTR_AND_OBJ map type in most cases and allows slightly more complicated ref_ptr/ptee and attach semantics.

>From 7851c81db2d5cfff2caa4d31c23891a8c6ae76d3 Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Mon, 26 Jan 2026 11:15:19 -0600
Subject: [PATCH] [OpenMP][MLIR] Modify lowering OpenMP Dialect lowering to
 support attach mapping

This PR adjusts the LLVM-IR lowering to support the new attach map type that the runtime
uses to link data and pointer together, this swaps the mapping from the older
OMP_MAP_PTR_AND_OBJ map type in most cases and allows slightly more complicated ref_ptr/ptee
and attach semantics.
---
 .../OpenMP/map-types-and-sizes.f90            | 211 ++++---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 516 ++++++++++--------
 .../allocatable_gpu_reduction_teams.mlir      |   3 +-
 .../omptarget-data-use-dev-ordering.mlir      |  15 +-
 .../LLVMIR/omptarget-host-ref-semantics.mlir  | 364 ++++++++++++
 ...t-nested-ptr-record-type-mapping-host.mlir |  23 +-
 ...arget-nested-record-type-mapping-host.mlir |   4 +-
 mlir/test/Target/LLVMIR/omptarget-nowait.mlir |  26 +-
 .../omptarget-record-type-mapping-host.mlir   |   4 +-
 ...rget-record-type-with-ptr-member-host.mlir |  80 ++-
 .../descriptor-stack-jam-regression.f90       |   3 +-
 .../offloading/fortran/map_attach_always.f90  |  70 +++
 .../offloading/fortran/map_attach_never.f90   |  55 ++
 .../fortran/map_ref_ptr_ptee_test_1.f90       |  48 ++
 .../fortran/map_ref_ptr_ptee_test_2.f90       |  47 ++
 ...ap-pointer-to-dtype-allocatable-member.f90 |   3 +-
 16 files changed, 1047 insertions(+), 425 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
 create mode 100644 offload/test/offloading/fortran/map_attach_always.f90
 create mode 100644 offload/test/offloading/fortran/map_attach_never.f90
 create mode 100644 offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
 create mode 100644 offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90

diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
index d6d93985d9895..98744da793fa0 100644
--- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
@@ -42,8 +42,7 @@ subroutine mapType_is_device_ptr
   !$omp end target
 end subroutine mapType_is_device_ptr
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 281474976711171, i64 281474976711187]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 515, i64 16384]
 subroutine mapType_ptr
   integer, pointer :: a
   !$omp target
@@ -82,8 +81,7 @@ subroutine map_ompx_hold
 !$omp end target data
 end subroutine
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 281474976711171, i64 281474976711187]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 515, i64 16384]
 subroutine mapType_allocatable
   integer, allocatable :: a
   allocate(a)
@@ -93,8 +91,7 @@ subroutine mapType_allocatable
   deallocate(a)
 end subroutine mapType_allocatable
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16384]
 subroutine mapType_ptr_explicit
   integer, pointer :: a
   !$omp target map(tofrom: a)
@@ -102,8 +99,7 @@ subroutine mapType_ptr_explicit
   !$omp end target
 end subroutine mapType_ptr_explicit
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16384]
 subroutine mapType_allocatable_explicit
   integer, allocatable :: a
   allocate(a)
@@ -254,8 +250,8 @@ subroutine mapType_derived_explicit_nested_member_with_bounds
 !$omp end target
 end subroutine mapType_derived_explicit_nested_member_with_bounds
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384]
 subroutine mapType_derived_type_alloca()
   type :: one_layer
   real(4) :: i
@@ -275,8 +271,8 @@ subroutine mapType_derived_type_alloca()
   !$omp end target
 end subroutine
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 0, i64 48, i64 0, i64 4, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 281474976710659, i64 16384, i64 16384]
 subroutine mapType_alloca_derived_type()
   type :: one_layer
   real(4) :: i
@@ -298,8 +294,8 @@ subroutine mapType_alloca_derived_type()
   !$omp end target
 end subroutine
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 0, i64 48, i64 0, i64 4, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 281474976710659, i64 16384, i64 16384]
 subroutine mapType_alloca_nested_derived_type()
   type :: middle_layer
   real(4) :: i
@@ -329,8 +325,8 @@ subroutine mapType_alloca_nested_derived_type()
   !$omp end target
 end subroutine
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384]
 subroutine mapType_nested_derived_type_alloca()
   type :: middle_layer
   real(4) :: i
@@ -358,8 +354,8 @@ subroutine mapType_nested_derived_type_alloca()
   !$omp end target
 end subroutine
 
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [7 x i64] [i64 0, i64 64, i64 8, i64 0, i64 48, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976710661, i64 281474976710656, i64 281474976710672, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [7 x i64] [i64 0, i64 64, i64 0, i64 48, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 16384, i64 16384]
 subroutine mapType_nested_derived_type_member_idx()
 type :: vertexes
   integer :: test
@@ -564,13 +560,14 @@ end subroutine mapType_common_block_members
 !CHECK: %[[DESC_BASE_ADDR_DATA_SIZE:.*]] = mul i64 %[[MEMBER_BASE_ADDR_SIZE]], 4
 !CHECK: %[[LOAD_ADDR_DATA:.*]] = load ptr, ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], align 8
 !CHECK: %[[GEP_ADDR_DATA:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ADDR_DATA]], i64 0
+!CHECK: %[[LOAD_ADDR_DATA2:.*]] = load ptr, ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], align 8
+!CHECK: %[[GEP_ADDR_DATA2:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ADDR_DATA2]], i64 0
 !CHECK: %[[MEMBER_ACCESS_ADDR_END:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i64 1
 !CHECK: %[[MEMBER_ACCESS_ADDR_INT:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_ADDR_END]] to i64
 !CHECK: %[[MEMBER_ACCESS_ADDR_BEGIN:.*]] = ptrtoint ptr %[[MEMBER_ACCESS]] to i64
 !CHECK: %[[DTYPE_SEGMENT_SIZE:.*]] = sub i64 %[[MEMBER_ACCESS_ADDR_INT]], %[[MEMBER_ACCESS_ADDR_BEGIN]]
 !CHECK: %[[DTYPE_SIZE_CALC:.*]] = sdiv exact i64 %[[DTYPE_SEGMENT_SIZE]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 !CHECK: %[[DTYPE_CMP:.*]] = icmp eq ptr %[[GEP_ADDR_DATA]], null
-!CHECK: %[[DTYPE_SEL:.*]] = select i1 %[[DTYPE_CMP]], i64 0, i64 %[[DESC_BASE_ADDR_DATA_SIZE]]
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 !CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
@@ -584,14 +581,11 @@ end subroutine mapType_common_block_members
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 !CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[GEP_ADDR_DATA2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[MEMBER_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
 !CHECK: store ptr %array_offset, ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[DTYPE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-
 
 !CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_derived_type_{{.*}}
 !CHECK: %{{.*}} = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
@@ -612,7 +606,6 @@ end subroutine mapType_common_block_members
 !CHECK: %[[DTYPE_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS]], align 8
 !CHECK: %[[DTYPE_ALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD]], i32 0, i32 4
 !CHECK: %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], i32 0, i32 0
-
 !CHECK: %[[DTYPE_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA]], i32 0, i32 0
 !CHECK: %[[DTYPE_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_2]], align 8
 !CHECK: %[[DTYPE_NONALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD_2]], i32 0, i32 5
@@ -622,63 +615,57 @@ end subroutine mapType_common_block_members
 !CHECK: %[[MEMBER_SIZE_CALC_3:.*]] = mul i64 1, %[[MEMBER_SIZE_CALC_2]]
 !CHECK: %[[MEMBER_SIZE_CALC_4:.*]] = mul i64 %[[MEMBER_SIZE_CALC_3]], 4
 !CHECK: %[[DTYPE_BASE_ADDR_LOAD_3:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
+!CHECK: %[[DTYPE_BASE_ADDR_LOAD_3_1:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
 !CHECK: %[[LOAD_DTYPE_DESC_MEMBER:.*]] = load ptr, ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], align 8
 !CHECK: %[[MEMBER_ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DTYPE_DESC_MEMBER]], i64 0
-!CHECK: %[[DTYPE_END_OFFSET:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_END:.*]] = ptrtoint ptr %[[DTYPE_END_OFFSET]] to i64
-!CHECK: %[[DTYPE_BEGIN:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-!CHECK: %[[DTYPE_DESC_SZ_CALC:.*]] = sub i64 %[[DTYPE_END]], %[[DTYPE_BEGIN]]
-!CHECK: %[[DTYPE_DESC_SZ:.*]] = sdiv exact i64 %[[DTYPE_DESC_SZ_CALC]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3_OFF:.*]] = getelementptr ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], i32 1
-!CHECK: %[[SIZE_2_CALC_1:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_4]] to i64
-!CHECK: %[[SIZE_2_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]] to i64
-!CHECK: %[[SIZE_2_CALC_3:.*]] = sub i64 %[[SIZE_2_CALC_1]], %[[SIZE_2_CALC_2]]
-!CHECK: %[[SIZE_2_CALC_4:.*]] = sdiv exact i64 %[[SIZE_2_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[CMP_NULL:.*]] = icmp eq ptr %[[MEMBER_ARRAY_OFFSET]], null
-!CHECK: %[[NULL_SEL:.*]] = select i1 %[[CMP_NULL]], i64 0, i64 %[[MEMBER_SIZE_CALC_4]]
-
+!CHECK: %[[SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_2:.*]] = ptrtoint ptr %[[SIZE_CALC_1]] to i64
+!CHECK: %[[SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_4:.*]] = sub i64 %[[SIZE_CALC_2]], %[[SIZE_CALC_3]]
+!CHECK: %[[SIZE_CALC_5:.*]] = sdiv exact i64 %[[SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[SIZE_CALC_6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_7:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3]] to i64
+!CHECK: %[[SIZE_CALC_8:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_9:.*]] = sub i64 %[[SIZE_CALC_7]], %[[SIZE_CALC_8]]
+!CHECK: %[[CALC_SIZE_1:.*]] = sdiv exact i64 %[[SIZE_CALC_9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[OFFLOAD_PTR:.*]] = getelementptr ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], i32 1
+!CHECK: %[[SIZE_CALC_10:.*]] = ptrtoint ptr %[[SIZE_CALC_6]] to i64
+!CHECK: %[[SIZE_CALC_11:.*]] = ptrtoint ptr %[[OFFLOAD_PTR]] to i64
+!CHECK: %[[SIZE_CALC_12:.*]] = sub i64 %[[SIZE_CALC_10]], %[[SIZE_CALC_11]]
+!CHECK: %[[SIZE_CALC_13:.*]] = sdiv exact i64 %[[SIZE_CALC_12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[DTYPE_BASE_ADDR_LOAD_3_1]], null
+!CHECK: %[[SEL_SZ:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 136
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %array_offset1, null
+!CHECK: %[[SEL_SZ2:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[MEMBER_SIZE_CALC_4]]
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DTYPE_DESC_SZ]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 1
+!CHECK: store i64 %[[CALC_SIZE_1]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2
-!CHECK: store i64 %[[SIZE_2_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[OFFLOAD_PTR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_BASE_ADDR_LOAD_3_1]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_LOAD_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[MEMBER_ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-!CHECK: store ptr %[[MEMBER_ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 7
-!CHECK: store i64 %[[NULL_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
 !CHECK: store ptr %[[DTYPE_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 
 !CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_nested_derived_type{{.*}}
@@ -705,62 +692,57 @@ end subroutine mapType_common_block_members
 !CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_4:.*]] = mul i64 1, %[[ALLOCATABLE_MEMBER_SIZE_CALC_3]]
 !CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
 !CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
+!CHECK: %[[LOAD_BASE_ADDR2:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
 !CHECK: %[[LOAD_DESC_MEMBER_BASE_ADDR:.*]] = load ptr, ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], align 8
 !CHECK: %[[ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DESC_MEMBER_BASE_ADDR]], i64 0
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_DESC_SIZE_CALC_1]] to i64
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_4:.*]] = sub i64 %[[DTYPE_DESC_SIZE_CALC_2]], %[[DTYPE_DESC_SIZE_CALC_3]]
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_DESC_SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3_OFF:.*]] = getelementptr ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], i32 1
-!CHECK: %[[SIZE_2_CALC_1:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3]] to i64
-!CHECK: %[[SIZE_2_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]] to i64
-!CHECK: %[[SIZE_2_CALC_3:.*]] = sub i64 %[[SIZE_2_CALC_1]], %[[SIZE_2_CALC_2]]
-!CHECK: %[[SIZE_2_CALC_4:.*]] = sdiv exact i64 %[[SIZE_2_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK:  %[[SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK:  %[[SIZE_CALC_2:.*]] = ptrtoint ptr %[[SIZE_CALC_1]] to i64
+!CHECK:  %[[SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK:  %[[SIZE_CALC_4:.*]] = sub i64 %[[SIZE_CALC_2]], %[[SIZE_CALC_3]]
+!CHECK:  %[[SIZE_CALC_5:.*]] = sdiv exact i64 %[[SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK:  %[[SIZE_CALC_6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK:  %[[SIZE_CALC_7:.*]] = ptrtoint ptr %36 to i64
+!CHECK:  %[[SIZE_CALC_8:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK:  %[[SIZE_CALC_9:.*]] = sub i64 %[[SIZE_CALC_7]], %[[SIZE_CALC_8]]
+!CHECK:  %[[CALC_SIZE:.*]] = sdiv exact i64 %[[SIZE_CALC_9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK:  %[[OFFLOAD_PTR:.*]] = getelementptr ptr, ptr %36, i32 1
+!CHECK:  %[[SIZE_CALC_10:.*]] = ptrtoint ptr %[[SIZE_CALC_6]] to i64
+!CHECK:  %[[SIZE_CALC_11:.*]] = ptrtoint ptr %[[OFFLOAD_PTR]] to i64
+!CHECK:  %[[SIZE_CALC_12:.*]] = sub i64 %[[SIZE_CALC_10]], %[[SIZE_CALC_11]]
+!CHECK:  %[[SIZE_CALC_13:.*]] = sdiv exact i64 %[[SIZE_CALC_12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[LOAD_BASE_ADDR2]], null
+!CHECK: %[[SEL_SZ:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 240
 !CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[ARRAY_OFFSET]], null
-!CHECK: %[[NULL_SEL:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
+!CHECK: %[[SEL_SZ2:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DTYPE_DESC_SIZE_CALC_5]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 1
+!CHECK: store i64 %[[CALC_SIZE]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2
-!CHECK: store i64 %[[SIZE_2_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[OFFLOAD_PTR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[LOAD_BASE_ADDR2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[LOAD_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[MAPPED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[MAPPED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
 !CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-!CHECK: store ptr %[[ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 7
-!CHECK: store i64 %[[NULL_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
 !CHECK: store ptr %[[NESTED_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 
 !CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_alloca{{.*}}
@@ -778,12 +760,14 @@ end subroutine mapType_common_block_members
 !CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
 !CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], align 8
 !CHECK: %[[ARR_OFFS:.*]] = getelementptr inbounds i32, ptr %[[LOAD_BASE_ADDR]], i64 0
+!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], align 8
+!CHECK: %[[ARR_OFFS2:.*]] = getelementptr inbounds i32, ptr %[[LOAD_BASE_ADDR]], i64 0
 !CHECK: %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_MEMBER_ACCESS]], i64 1
 !CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_1:.*]] = ptrtoint ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2]] to i64
 !CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_2:.*]] = ptrtoint ptr %[[NESTED_MEMBER_ACCESS]] to i64
 !CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_3:.*]] = sub i64 %[[DTYPE_SEGMENT_SIZE_CALC_1]], %[[DTYPE_SEGMENT_SIZE_CALC_2]]
 !CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_4:.*]] = sdiv exact i64 %[[DTYPE_SEGMENT_SIZE_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[DATA_CMP:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[DATA_CMP:.*]] = icmp eq ptr %[[ARR_OFFS2]], null
 !CHECK: %[[DATA_SEL:.*]] = select i1 %[[DATA_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 !CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
@@ -798,13 +782,13 @@ end subroutine mapType_common_block_members
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 !CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[DATA_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[NESTED_MEMBER_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
 !CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[DATA_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 
 !CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_member_idx{{.*}}
 !CHECK: %[[ALLOCA_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, align 8
@@ -845,15 +829,21 @@ end subroutine mapType_common_block_members
 !CHECK: %[[ARR_OFFS:.*]] = getelementptr inbounds %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[LOAD_OFF_PTR]], i64 0
 !CHECK: %[[LOAD_ARR_OFFS:.*]] = load ptr, ptr %[[OFF_PTR_4]], align 8
 !CHECK: %[[ARR_OFFS_1:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ARR_OFFS]], i64 0
+!CHECK: %[[LOAD_OFF_PTR:.*]] = load ptr, ptr %[[OFF_PTR_2]], align 8
+!CHECK: %[[ARR_OFFS_2:.*]] = getelementptr inbounds %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[LOAD_OFF_PTR]], i64 0
+!CHECK: %[[LOAD_ARR_OFFS:.*]] = load ptr, ptr %[[OFF_PTR_4]], align 8
+!CHECK: %[[ARR_OFFS_3:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ARR_OFFS]], i64 0
 !CHECK: %[[SZ_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[OFF_PTR_1]], i64 1
 !CHECK: %[[SZ_CALC_2:.*]] = ptrtoint ptr %[[SZ_CALC_1]] to i64
 !CHECK: %[[SZ_CALC_3:.*]] = ptrtoint ptr %[[OFF_PTR_1]] to i64
 !CHECK: %[[SZ_CALC_4:.*]] = sub i64 %[[SZ_CALC_2]], %[[SZ_CALC_3]]
 !CHECK: %[[SZ_CALC_5:.*]] = sdiv exact i64 %[[SZ_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[SIZE_CMP:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[SIZE_CMP:.*]] = icmp eq ptr %[[ARR_OFFS_2]], null
 !CHECK: %[[SIZE_SEL:.*]] = select i1 %[[SIZE_CMP]], i64 0, i64 %[[OFF_PTR_3]]
-!CHECK: %[[SIZE_CMP2:.*]] = icmp eq ptr %[[ARR_OFFS_1]], null
+!CHECK: %[[SIZE_CMP2:.*]] = icmp eq ptr %[[ARR_OFFS_3]], null
 !CHECK: %[[SIZE_SEL2:.*]] = select i1 %[[SIZE_CMP2]], i64 0, i64 %[[SZ_CALC_4_2]]
+!CHECK: %[[SIZE_CMP3:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[SIZE_SEL3:.*]] = select i1 %[[SIZE_CMP3]], i64 0, i64 64
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 !CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 0
@@ -867,32 +857,27 @@ end subroutine mapType_common_block_members
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 !CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[OFF_PTR_2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS_2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[SIZE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[OFF_PTR_2]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[SIZE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
 !CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[OFF_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[OFF_PTR_4]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 5
+!CHECK: store i64 %[[SIZE_SEL3]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-!CHECK: store ptr %[[OFF_PTR_4]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[BASE_PTR_ARR]], align 8
 !CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 6
 !CHECK: store ptr %[[ARR_OFFS_1]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 6
-!CHECK: store i64 %[[SIZE_SEL2]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_{{.*}}
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-!CHECK: store ptr @var_common_, ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-!CHECK: store ptr @var_common_, ptr %[[OFFLOAD_PTR_ARR]], align 8
 
 !CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_members_{{.*}}
 !CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c72a93bfe92df..a7bf028b308ba 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4370,6 +4370,11 @@ static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
   return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
 }
 
+static bool checkHasClauseMapFlag(omp::ClauseMapFlags flag,
+                                  omp::ClauseMapFlags checkFlag) {
+  return (flag & checkFlag) == checkFlag;
+}
+
 // Convert the MLIR map flag set to the runtime map flag set for embedding
 // in LLVM-IR. This is important as the two bit-flag lists do not correspond
 // 1-to-1 as there's flags the runtime doesn't care about and vice versa.
@@ -4386,40 +4391,40 @@ convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) {
   llvm::omp::OpenMPOffloadMappingFlags mapType =
       llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::to))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::to))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::from))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::from))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::always))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::always))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::del))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::del))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::return_param))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::return_param))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::priv))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::priv))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::literal))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::literal))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::implicit))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::implicit))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::close))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::close))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::present))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::present))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::ompx_hold))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::ompx_hold))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
 
-  if (mapTypeToBool(omp::ClauseMapFlags::attach))
+  if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::attach))
     mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
 
   if (mapTypeToBool(omp::ClauseMapFlags::is_device_ptr)) {
@@ -4437,6 +4442,16 @@ static void collectMapDataFromMapOperands(
     llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
     ArrayRef<Value> useDevAddrOperands = {},
     ArrayRef<Value> hasDevAddrOperands = {}) {
+
+  auto checkRefPtrOrPteeMapWithAttach = [](omp::ClauseMapFlags mapType) {
+    bool hasRefType =
+        checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::ref_ptr) ||
+        checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::ref_ptee) ||
+        checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::ref_ptr_ptee);
+    return hasRefType &&
+           checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::attach);
+  };
+
   auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
     // Check if this is a member mapping and correctly assign that it is, if
     // it is a member of a larger object.
@@ -4457,10 +4472,16 @@ static void collectMapDataFromMapOperands(
   // Process MapOperands
   for (Value mapValue : mapVars) {
     auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
-    Value offloadPtr =
-        mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
+    bool isRefPtrOrPteeMapWithAttach =
+        checkRefPtrOrPteeMapWithAttach(mapOp.getMapType());
+    Value offloadPtr = (mapOp.getVarPtrPtr() && !isRefPtrOrPteeMapWithAttach)
+                           ? mapOp.getVarPtrPtr()
+                           : mapOp.getVarPtr();
     mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
-    mapData.Pointers.push_back(mapData.OriginalValue.back());
+    mapData.Pointers.push_back(
+        isRefPtrOrPteeMapWithAttach
+            ? moduleTranslation.lookupValue(mapOp.getVarPtrPtr())
+            : mapData.OriginalValue.back());
 
     if (llvm::Value *refPtr =
             getRefPtrIfDeclareTarget(offloadPtr, moduleTranslation)) {
@@ -4477,12 +4498,22 @@ static void collectMapDataFromMapOperands(
     // In every situation we currently have if we have a varPtrPtr present
     // we wish to utilise it's type for the base type, main cases are
     // currently Fortran descriptor base address maps and attach maps.
-    mlir::Type baseTy = mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
-                                             : mapOp.getVarPtrType();
-    mapData.BaseType.push_back(moduleTranslation.convertType(baseTy));
-    mapData.Sizes.push_back(
-        getSizeInBytes(dl, baseTy, mapOp, mapData.Pointers.back(),
-                       mapData.BaseType.back(), builder, moduleTranslation));
+    mapData.BaseType.push_back(moduleTranslation.convertType(
+        mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
+                             : mapOp.getVarPtrType()));
+
+    // For the attach map cases, it's a little odd, as we effectively have to
+    // utilise the base address (including all bounds offsets) for the pointer
+    // field, the pointer address for the base address field, and the pointer
+    // not the data (base addresses) size. So we end up with a mix of base
+    // types and sizes we wish to insert here.
+    mlir::Type sizeType = (isRefPtrOrPteeMapWithAttach || !mapOp.getVarPtrPtr())
+                              ? mapOp.getVarPtrType()
+                              : mapOp.getVarPtrPtrType().value();
+    mapData.Sizes.push_back(getSizeInBytes(
+        dl, sizeType, isRefPtrOrPteeMapWithAttach ? nullptr : mapOp,
+        mapData.Pointers.back(), moduleTranslation.convertType(sizeType),
+        builder, moduleTranslation));
     mapData.MapClause.push_back(mapOp.getOperation());
     mapData.Types.push_back(convertClauseMapFlags(mapOp.getMapType()));
     mapData.Names.push_back(LLVM::createMappingInformation(
@@ -4499,11 +4530,18 @@ static void collectMapDataFromMapOperands(
   }
 
   auto findMapInfo = [&mapData](llvm::Value *val,
-                                llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
+                                llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy,
+                                size_t memberCount) {
     unsigned index = 0;
     bool found = false;
     for (llvm::Value *basePtr : mapData.OriginalValue) {
-      if (basePtr == val && mapData.IsAMapping[index]) {
+      auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[index]);
+      // TODO/FIXME: Currently we define an equivelant mapping as
+      // the same base pointer and an equivelant member count, but
+      // that is a loose definition, we may have to extend to check
+      // for other fields (varPtrPtr/invidiual members being mapped)
+      if (basePtr == val && mapData.IsAMapping[index] &&
+          memberCount == mapOp.getMembers().size()) {
         found = true;
         mapData.Types[index] |=
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
@@ -4524,15 +4562,31 @@ static void collectMapDataFromMapOperands(
       llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
 
       // Check if map info is already present for this entry.
-      if (!findMapInfo(origValue, devInfoTy)) {
+      if (!findMapInfo(origValue, devInfoTy, mapOp.getMembers().size())) {
         mapData.OriginalValue.push_back(origValue);
         mapData.Pointers.push_back(mapData.OriginalValue.back());
         mapData.IsDeclareTarget.push_back(false);
         mapData.BasePointers.push_back(mapData.OriginalValue.back());
-        mapData.BaseType.push_back(moduleTranslation.convertType(
-            mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
-                                 : mapOp.getVarPtrType()));
-        mapData.Sizes.push_back(builder.getInt64(0));
+        mlir::Type baseTy = mapOp.getVarPtrPtr()
+                                ? mapOp.getVarPtrPtrType().value()
+                                : mapOp.getVarPtrType();
+        mapData.BaseType.push_back(moduleTranslation.convertType(baseTy));
+
+        // If we're an attach map, we need to maintain the size currently, even
+        // if we're not sending data, as the runtime (at least currently)
+        // expects a size greater than 0. An alternative may be to skip attach
+        // maps when applied to use_dev_ptr/addr and there's no other map type
+        // present.
+        if ((convertClauseMapFlags(mapOp.getMapType()) &
+             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) {
+          mapData.Sizes.push_back(getSizeInBytes(
+              dl, baseTy, mapOp, mapData.Pointers.back(),
+              mapData.BaseType.back(), builder, moduleTranslation));
+        } else {
+          mapData.Sizes.push_back(builder.getInt64(0));
+        }
+
         mapData.MapClause.push_back(mapOp.getOperation());
         mapData.Types.push_back(
             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
@@ -4829,6 +4883,93 @@ static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
   return false;
 }
 
+static void
+processIndividualMap(llvm::IRBuilderBase &builder,
+                     llvm::OpenMPIRBuilder &ompBuilder, MapInfoData &mapData,
+                     size_t mapDataIdx, MapInfosTy &combinedInfo,
+                     TargetDirectiveEnumTy targetDirective,
+                     llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+                         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+                     bool isTargetParam = true, int mapDataParentIdx = -1) {
+  auto mapFlag = mapData.Types[mapDataIdx];
+  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
+
+  bool isPtrTy = checkIfPointerMap(mapInfoOp);
+  bool isAttachMap = ((convertClauseMapFlags(mapInfoOp.getMapType()) &
+                       llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+                      llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+
+  // Declare Target Mappings are excluded from being marked as
+  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
+  // marked with OMP_MAP_PTR_AND_OBJ instead. It's also the only type of
+  // mapping that currently utilises OMP_MAP_PTR_AND_OBJ after moving to
+  // attach map semantics.
+  if (isPtrTy && mapData.IsDeclareTarget[mapDataIdx])
+    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+
+  // Declare target variables are not passed to the kernel, and for the moment
+  // attach maps are not passed to the kernel, however, it is possible to create
+  // attach maps that transfer data and thus can be kernel arguments, but our 
+  // existing frontend does not do this.
+  if (isTargetParam &&
+      (targetDirective == TargetDirectiveEnumTy::Target &&
+       !mapData.IsDeclareTarget[mapDataIdx]) &&
+      !isAttachMap)
+    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+
+  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
+      !isPtrTy)
+    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+
+  // if we have a pointer and it's part of a MEMBER_OF mapping we do not apply
+  // MEMBER_OF, as the runtime currently has a work-around that utilises
+  // MEMBER_OF to prevent reference updating in certain scenarios instead of
+  // target_param, however, this causes a noticable issue in cases where we
+  // map some data (Fortran descriptor primarily at the moment), alter it on
+  // the host, and then expect it to not be updated in a subsequent impliict map
+  // (such as an implicit map on a target).
+  if (memberOfFlag != llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE) {
+    if (!isPtrTy && !isAttachMap)
+      ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+
+    // The return parameter should be the over-riding parent in cases where we
+    // have a return parameter that is echoed to all members, the main case of
+    // this currently is with fortran descriptors. It may need more finessing
+    // for C/C++ in the future or descriptors that are members of derived
+    // types.
+    mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+  }
+
+  // if we're provided a mapDataParentIdx, then the data being mapped is
+  // part of a larger object (in a parent <-> member mapping) and in this
+  // case our BasePointer should be the parent. Except in the edge case
+  // where we are mapping pointee data, in this case we try stay close to
+  // what Clang currently does and utilise the regular base pointer of the
+  // data.
+  if (mapDataParentIdx >= 0 &&
+      !(checkHasClauseMapFlag(mapInfoOp.getMapType(),
+                              omp::ClauseMapFlags::ref_ptee) ||
+        (checkHasClauseMapFlag(mapInfoOp.getMapType(),
+                               omp::ClauseMapFlags::ref_ptr_ptee) &&
+         isPtrTy))) {
+    combinedInfo.BasePointers.emplace_back(
+        mapData.BasePointers[mapDataParentIdx]);
+  } else {
+    combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
+  }
+
+  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
+  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
+  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
+  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
+  combinedInfo.Types.emplace_back(mapFlag);
+  combinedInfo.Sizes.emplace_back(
+      isPtrTy ? builder.CreateSelect(
+                    builder.CreateIsNull(mapData.Pointers[mapDataIdx]),
+                    builder.getInt64(0), mapData.Sizes[mapDataIdx])
+              : mapData.Sizes[mapDataIdx]);
+}
+
 // This creates two insertions into the MapInfosTy data structure for the
 // "parent" of a set of members, (usually a container e.g.
 // class/structure/derived type) when subsequent members have also been
@@ -4844,38 +4985,42 @@ static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
 //
 // This function borrows a lot from Clang's emitCombinedEntry function
 // inside of CGOpenMPRuntime.cpp
-static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
+static void mapParentWithMembers(
     LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
     llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
     MapInfoData &mapData, uint64_t mapDataIndex,
+    llvm::omp::OpenMPOffloadMappingFlags memberOfFlag,
     TargetDirectiveEnumTy targetDirective) {
+  using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
   assert(!ompBuilder.Config.isTargetDevice() &&
          "function only supported for host device codegen");
-
   auto parentClause =
       llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-
   auto *parentMapper = mapData.Mappers[mapDataIndex];
 
   // Map the first segment of the parent. If a user-defined mapper is attached,
   // include the parent's to/from-style bits (and common modifiers) in this
   // base entry so the mapper receives correct copy semantics via its 'type'
   // parameter. Also keep TARGET_PARAM when required for kernel arguments.
-  llvm::omp::OpenMPOffloadMappingFlags baseFlag =
-      (targetDirective == TargetDirectiveEnumTy::Target &&
-       !mapData.IsDeclareTarget[mapDataIndex])
-          ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
-          : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+  mapFlags baseFlag = (targetDirective == TargetDirectiveEnumTy::Target &&
+                       !mapData.IsDeclareTarget[mapDataIndex])
+                          ? mapFlags::OMP_MAP_TARGET_PARAM
+                          : mapFlags::OMP_MAP_NONE;
 
   if (parentMapper) {
-    using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
     // Preserve relevant map-type bits from the parent clause. These include
     // the copy direction (TO/FROM), as well as commonly used modifiers that
     // should be visible to the mapper for correct behaviour.
     mapFlags parentFlags = mapData.Types[mapDataIndex];
     mapFlags preserve = mapFlags::OMP_MAP_TO | mapFlags::OMP_MAP_FROM |
                         mapFlags::OMP_MAP_ALWAYS | mapFlags::OMP_MAP_CLOSE |
-                        mapFlags::OMP_MAP_PRESENT | mapFlags::OMP_MAP_OMPX_HOLD;
+                        mapFlags::OMP_MAP_PRESENT |
+                        mapFlags::OMP_MAP_OMPX_HOLD |
+                        mapFlags::OMP_MAP_IMPLICIT;
+    baseFlag |= (parentFlags & preserve);
+  } else {
+    mapFlags parentFlags = mapData.Types[mapDataIndex];
+    mapFlags preserve = mapFlags::OMP_MAP_PRESENT;
     baseFlag |= (parentFlags & preserve);
   }
 
@@ -4911,15 +5056,32 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
     auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
     int firstMemberIdx = getMapDataMemberIdx(
         mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
-    lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
+    lowAddr = builder.CreatePointerCast(mapData.BasePointers[firstMemberIdx],
                                         builder.getPtrTy());
+
     int lastMemberIdx = getMapDataMemberIdx(
         mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
+    auto lastMemberMapInfo =
+        cast<omp::MapInfoOp>(mapData.MapClause[lastMemberIdx]);
+
+    // NOTE: Currently, for RefPtee the BaseType is set to the varPtrPtr field,
+    // which is the pointer datas type and not the member within the structure
+    // that it's part of, so we have to make sure we use the member type in this
+    // case when calculating the parents size offsets.
+    // TODO: May be good to extend MapInfoData to support tracking of both
+    // VarPtr/VarPtrPtr BaseType's to better distinguish what's being used more
+    // consistently.
+    bool isRefPteeMap = checkHasClauseMapFlag(lastMemberMapInfo.getMapType(),
+                                              omp::ClauseMapFlags::ref_ptee);
+    llvm::Type *castType = mapData.BaseType[lastMemberIdx];
+    if (isRefPteeMap)
+      castType =
+          moduleTranslation.convertType(lastMemberMapInfo.getVarPtrType());
     highAddr = builder.CreatePointerCast(
-        builder.CreateGEP(mapData.BaseType[lastMemberIdx],
-                          mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
+        builder.CreateGEP(castType, mapData.BasePointers[lastMemberIdx],
+                          builder.getInt64(1)),
         builder.getPtrTy());
-    combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
+    combinedInfo.Pointers.emplace_back(mapData.BasePointers[firstMemberIdx]);
   }
 
   llvm::Value *size = builder.CreateIntCast(
@@ -4928,9 +5090,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
       /*isSigned=*/false);
   combinedInfo.Sizes.push_back(size);
 
-  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
-      ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
-
   // This creates the initial MEMBER_OF mapping that consists of
   // the parent/top level container (same as above effectively, except
   // with a fixed initial compile time size and separate maptype which
@@ -4942,10 +5101,9 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
     // for the map flags that Clang currently supports (e.g. it should do some
     // further case specific flag modifications). For the moment, it handles
     // what we support as expected.
-    llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
-    bool hasMapClose = (llvm::omp::OpenMPOffloadMappingFlags(mapFlag) &
-                        llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE) ==
-                       llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
+    mapFlags mapFlag = mapData.Types[mapDataIndex];
+    bool hasMapClose = (mapFlags(mapFlag) & mapFlags::OMP_MAP_CLOSE) ==
+                       mapFlags::OMP_MAP_CLOSE;
     ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
 
     if (targetDirective == TargetDirectiveEnumTy::TargetUpdate || hasMapClose) {
@@ -4980,7 +5138,7 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
       // It appears to be an optimisation rather than a necessity though,
       // but this requires further investigation. However, we would have to make
       // sure to not exclude maps with bounds that ARE pointers, as these are
-      // processed as separate components, i.e. pointer + data.
+      // processed as seperate components, i.e. pointer + data.
       for (auto v : overlapIdxs) {
         auto mapDataOverlapIdx = getMapDataMemberIdx(
             mapData,
@@ -5021,130 +5179,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
           builder.getInt64Ty(), true));
     }
   }
-  return memberOfFlag;
-}
-
-// This function is intended to add explicit mappings of members
-static void processMapMembersWithParent(
-    LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
-    llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
-    MapInfoData &mapData, uint64_t mapDataIndex,
-    llvm::omp::OpenMPOffloadMappingFlags memberOfFlag,
-    TargetDirectiveEnumTy targetDirective) {
-  assert(!ompBuilder.Config.isTargetDevice() &&
-         "function only supported for host device codegen");
-
-  auto parentClause =
-      llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-
-  for (auto mappedMembers : parentClause.getMembers()) {
-    auto memberClause =
-        llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
-    int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
-
-    assert(memberDataIdx >= 0 && "could not find mapped member of structure");
-
-    // If we're currently mapping a pointer to a block of data, we must
-    // initially map the pointer, and then attatch/bind the data with a
-    // subsequent map to the pointer. This segment of code generates the
-    // pointer mapping, which can in certain cases be optimised out as Clang
-    // currently does in its lowering. However, for the moment we do not do so,
-    // in part as we currently have substantially less information on the data
-    // being mapped at this stage.
-    if (checkIfPointerMap(memberClause)) {
-      auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
-      mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
-      mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
-      ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
-      combinedInfo.Types.emplace_back(mapFlag);
-      combinedInfo.DevicePointers.emplace_back(
-          llvm::OpenMPIRBuilder::DeviceInfoTy::None);
-      combinedInfo.Mappers.emplace_back(nullptr);
-      combinedInfo.Names.emplace_back(
-          LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
-      combinedInfo.BasePointers.emplace_back(
-          mapData.BasePointers[mapDataIndex]);
-      combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
-      combinedInfo.Sizes.emplace_back(builder.getInt64(
-          moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
-    }
-
-    // Same MemberOfFlag to indicate its link with parent and other members
-    // of.
-    auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
-    mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
-    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
-    ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
-    bool isDeclTargetTo = isDeclareTargetTo(parentClause.getVarPtr()
-                                                ? parentClause.getVarPtr()
-                                                : parentClause.getVarPtrPtr());
-    if (checkIfPointerMap(memberClause) &&
-        (!isDeclTargetTo ||
-         (targetDirective != TargetDirectiveEnumTy::TargetUpdate &&
-          targetDirective != TargetDirectiveEnumTy::TargetData))) {
-      mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
-    }
-
-    combinedInfo.Types.emplace_back(mapFlag);
-    combinedInfo.DevicePointers.emplace_back(
-        mapData.DevicePointers[memberDataIdx]);
-    combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
-    combinedInfo.Names.emplace_back(
-        LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
-    uint64_t basePointerIndex =
-        checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
-    combinedInfo.BasePointers.emplace_back(
-        mapData.BasePointers[basePointerIndex]);
-    combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
-
-    llvm::Value *size = mapData.Sizes[memberDataIdx];
-    if (checkIfPointerMap(memberClause)) {
-      size = builder.CreateSelect(
-          builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
-          builder.getInt64(0), size);
-    }
-
-    combinedInfo.Sizes.emplace_back(size);
-  }
-}
-
-static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
-                                 MapInfosTy &combinedInfo,
-                                 TargetDirectiveEnumTy targetDirective,
-                                 int mapDataParentIdx = -1) {
-  // Declare Target Mappings are excluded from being marked as
-  // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
-  // marked with OMP_MAP_PTR_AND_OBJ instead.
-  auto mapFlag = mapData.Types[mapDataIdx];
-  auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
-
-  bool isPtrTy = checkIfPointerMap(mapInfoOp);
-  if (isPtrTy)
-    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
-
-  if (targetDirective == TargetDirectiveEnumTy::Target &&
-      !mapData.IsDeclareTarget[mapDataIdx])
-    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
-
-  if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
-      !isPtrTy)
-    mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
-
-  // if we're provided a mapDataParentIdx, then the data being mapped is
-  // part of a larger object (in a parent <-> member mapping) and in this
-  // case our BasePointer should be the parent.
-  if (mapDataParentIdx >= 0)
-    combinedInfo.BasePointers.emplace_back(
-        mapData.BasePointers[mapDataParentIdx]);
-  else
-    combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
-
-  combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
-  combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
-  combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
-  combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
-  combinedInfo.Types.emplace_back(mapFlag);
-  combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
 }
 
 static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
@@ -5176,18 +5210,41 @@ static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
     // Clang maps array without bounds as pointers (which we do not
     // currently do), whereas we treat them as arrays in all cases
     // currently.
-    processIndividualMap(mapData, memberDataIdx, combinedInfo, targetDirective,
-                         mapDataIndex);
+    processIndividualMap(builder, ompBuilder, mapData, memberDataIdx,
+                         combinedInfo, targetDirective,
+                         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+                         true, mapDataIndex);
     return;
   }
 
-  llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
+  auto collectMapInfoIdxs =
+      [&](llvm::SmallVectorImpl<int64_t> &mapsAndInfoIdx) {
+        auto parentClause =
+            llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
+        mapsAndInfoIdx.push_back(getMapDataMemberIdx(mapData, parentClause));
+        for (auto member : parentClause.getMembers())
+          mapsAndInfoIdx.push_back(getMapDataMemberIdx(
+              mapData, llvm::cast<omp::MapInfoOp>(member.getDefiningOp())));
+      };
+
+  llvm::SmallVector<int64_t> mapInfoIdx;
+  collectMapInfoIdxs(mapInfoIdx);
+
+  llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+      ompBuilder.getMemberOfFlag(combinedInfo.Types.size());
+  for (size_t i = 0; i < mapInfoIdx.size(); i++) {
+    // Index == 0 is the parent map and if it gets here it's an unattachable
+    // type and should have OMP_MAP_TARGET_PARAM applied and no MEMBER_OF flag.
+    if (i == 0) {
       mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
-                           combinedInfo, mapData, mapDataIndex,
+                           combinedInfo, mapData, mapInfoIdx[i], memberOfFlag,
                            targetDirective);
-  processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
-                              combinedInfo, mapData, mapDataIndex,
-                              memberOfParentFlag, targetDirective);
+    } else {
+      processIndividualMap(builder, ompBuilder, mapData, mapInfoIdx[i],
+                           combinedInfo, targetDirective, memberOfFlag, false,
+                           mapDataIndex);
+    }
+  }
 }
 
 // This is a variation on Clang's GenerateOpenMPCapturedVars, which
@@ -5201,9 +5258,17 @@ createAlteredByCaptureMap(MapInfoData &mapData,
   assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
          "function only supported for host device codegen");
   for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
-    // if it's declare target, skip it, it's handled separately.
-    if (!mapData.IsDeclareTarget[i]) {
-      auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+    auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+    bool isAttachMap =
+        ((convertClauseMapFlags(mapOp.getMapType()) &
+          llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+
+    // If it's declare target, skip it, it's handled separately. However, if
+    // it's declare target, and an attach map, we want to calculate the exact
+    // address offset so that we attach correctly.
+    if (!mapData.IsDeclareTarget[i] ||
+        (mapData.IsDeclareTarget[i] && isAttachMap)) {
       omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
       bool isPtrTy = checkIfPointerMap(mapOp);
 
@@ -5290,8 +5355,6 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
   // utilise the size from any component of MapInfoData, if we can't
   // something is missing from the initial MapInfoData construction.
   for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
-    // NOTE/TODO: We currently do not support arbitrary depth record
-    // type mapping.
     if (mapData.IsAMember[i])
       continue;
 
@@ -5302,7 +5365,8 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
       continue;
     }
 
-    processIndividualMap(mapData, i, combinedInfo, targetDirective);
+    processIndividualMap(builder, *ompBuilder, mapData, i, combinedInfo,
+                         targetDirective);
   }
 }
 
@@ -5845,40 +5909,43 @@ handleDeclareTargetMapVar(MapInfoData &mapData,
     // function to link the two variables in the runtime and then both the
     // reference pointer and the pointer are assigned in the kernel argument
     // structure for the host.
-    if (mapData.IsDeclareTarget[i]) {
-      // If the original map value is a constant, then we have to make sure all
-      // of it's uses within the current kernel/function that we are going to
-      // rewrite are converted to instructions, as we will be altering the old
-      // use (OriginalValue) from a constant to an instruction, which will be
-      // illegal and ICE the compiler if the user is a constant expression of
-      // some kind e.g. a constant GEP.
-      if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
-        convertUsersOfConstantsToInstructions(constant, func, false);
-
-      // The users iterator will get invalidated if we modify an element,
-      // so we populate this vector of uses to alter each user on an
-      // individual basis to emit its own load (rather than one load for
-      // all).
-      llvm::SmallVector<llvm::User *> userVec;
-      for (llvm::User *user : mapData.OriginalValue[i]->users())
-        userVec.push_back(user);
-
-      for (llvm::User *user : userVec) {
-        if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
-          if (insn->getFunction() == func) {
-            auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
-            llvm::Value *substitute = mapData.BasePointers[i];
-            if (isDeclareTargetLink(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
-                                                         : mapOp.getVarPtr())) {
-              builder.SetCurrentDebugLocation(insn->getDebugLoc());
-              substitute = builder.CreateLoad(
-                  mapData.BasePointers[i]->getType(), mapData.BasePointers[i]);
-              cast<llvm::LoadInst>(substitute)->moveBefore(insn->getIterator());
-            }
-            user->replaceUsesOfWith(mapData.OriginalValue[i], substitute);
-          }
-        }
+    if (!mapData.IsDeclareTarget[i])
+      continue;
+    // If the original map value is a constant, then we have to make sure all
+    // of it's uses within the current kernel/function that we are going to
+    // rewrite are converted to instructions, as we will be altering the old
+    // use (OriginalValue) from a constant to an instruction, which will be
+    // illegal and ICE the compiler if the user is a constant expression of
+    // some kind e.g. a constant GEP.
+    if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
+      convertUsersOfConstantsToInstructions(constant, func, false);
+
+    // The users iterator will get invalidated if we modify an element,
+    // so we populate this vector of uses to alter each user on an
+    // individual basis to emit its own load (rather than one load for
+    // all).
+    llvm::SmallVector<llvm::User *> userVec;
+    for (llvm::User *user : mapData.OriginalValue[i]->users())
+      userVec.push_back(user);
+
+    for (llvm::User *user : userVec) {
+      auto *insn = dyn_cast<llvm::Instruction>(user);
+      if (!insn || insn->getFunction() != func)
+        continue;
+      auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+      llvm::Value *substitute = mapData.BasePointers[i];
+      if (isDeclareTargetLink(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
+                                                   : mapOp.getVarPtr()) ||
+          (isDeclareTargetTo(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
+                                                  : mapOp.getVarPtr()) &&
+           moduleTranslation.getOpenMPBuilder()
+               ->Config.hasRequiresUnifiedSharedMemory())) {
+        builder.SetCurrentDebugLocation(insn->getDebugLoc());
+        substitute = builder.CreateLoad(mapData.BasePointers[i]->getType(),
+                                        mapData.BasePointers[i]);
+        cast<llvm::LoadInst>(substitute)->moveBefore(insn->getIterator());
       }
+      user->replaceUsesOfWith(mapData.OriginalValue[i], substitute);
     }
   }
 }
@@ -6621,13 +6688,18 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   }
 
   for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
-    // declare target arguments are not passed to kernels as arguments
+    // 1) Declare target arguments are not passed to kernels as arguments
+    // 2) Attach maps are not passed in as arguments to kernels
+    // 3) Children of record objects are not passed in as arguments
     // TODO: We currently do not handle cases where a member is explicitly
     // passed in as an argument, this will likley need to be handled in
     // the near future, rather than using IsAMember, it may be better to
     // test if the relevant BlockArg is used within the target region and
     // then use that as a basis for exclusion in the kernel inputs.
-    if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
+    bool isAttachMap = (mapData.Types[i] &
+                        llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+                       llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
+    if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i] && !isAttachMap)
       kernelInput.push_back(mapData.OriginalValue[i]);
   }
 
diff --git a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
index e0471f6f303fd..afa07c93851df 100644
--- a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
+++ b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
@@ -45,7 +45,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 :
     %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
     %8 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
     %9 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr, f32) -> !llvm.ptr {name = ""}
-    %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to, attach) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"}
+    %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"}
+    %attach = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(ref_ptr_ptee, attach) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr, f32) -> !llvm.ptr {name = "scalar_alloc"}
     omp.target map_entries(%10 -> %arg0 : !llvm.ptr) {
       %14 = llvm.mlir.constant(1000000 : i32) : i32
       %15 = llvm.mlir.constant(1 : i32) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir b/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
index ab59b597846bb..e35a3228ef4f2 100644
--- a/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
@@ -15,7 +15,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
     %5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
     %6 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg3 : !llvm.ptr, i32) bounds(%3) -> !llvm.ptr
     %7 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%6 : [0] : !llvm.ptr) -> !llvm.ptr
-    %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg5 : !llvm.ptr, i32) -> !llvm.ptr
+    %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg5 : !llvm.ptr, f32) -> !llvm.ptr
     %9 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%8 : [0] : !llvm.ptr) -> !llvm.ptr
     %10 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
     omp.target_data map_entries(%4, %5 : !llvm.ptr, !llvm.ptr) use_device_addr(%7 -> %arg6, %9 -> %arg7, %6 -> %arg8, %8 -> %arg9 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) use_device_ptr(%10 -> %arg10 : !llvm.ptr) {
@@ -67,12 +67,11 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 
 // CHECK: define void @mix_use_device_ptr_and_addr_and_map_(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
 // CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 // CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_0_GEP]], align 8
-// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
 // CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
-// CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_3_GEP]], align 8
+// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
 
 // CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
 // CHECK: %[[LOAD_BASEPTR_0:.*]] = load ptr, ptr %[[BASEPTR_0_GEP]], align 8
@@ -93,11 +92,11 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 
 // CHECK: define void @mix_use_device_ptr_and_addr_and_map_2(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
 // CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
 // CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_1_GEP]], align 8
-// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
 // CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
+// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
 // CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_3_GEP]], align 8
 // CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
 // CHECK: %[[LOAD_BASEPTR_1:.*]] = load ptr, ptr %[[BASEPTR_1_GEP]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir b/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
new file mode 100644
index 0000000000000..aeb42615a6c8c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
@@ -0,0 +1,364 @@
+
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Tests that we correctly lower the different variations of reference pointer
+// and attach semantics.
+
+module attributes {omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.target_triples = ["amdgcn-amd-amdhsa"], omp.version = #omp.version<version = 61>} {
+  llvm.func @attach_always_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+    %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, attach, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+
+  llvm.func @attach_never_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+
+  llvm.func @attach_auto_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+    %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+
+  llvm.func @ref_ptr_ptee_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+    %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+
+  llvm.func @ref_ptr_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr) capture(ByRef) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+
+  llvm.func @ref_ptee_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+
+  llvm.func @ref_ptr_ptee_attach_never_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+    %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16388]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16384]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710657, i64 281474976710657, i64 1, i64 16384]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [2 x i64] [i64 0, i64 24]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 16384, i64 33]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 16384, i64 33]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710657, i64 1]
+
+// CHECK: define void @attach_always_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK:  %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK:  %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK:  %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK:  %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK:  %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK:  %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK:  %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK:  %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK:  %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK:  %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK:  %[[VAL_19:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK:  %[[VAL_20:.*]] = select i1 %[[VAL_19]], i64 0, i64 24
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK:  store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK:  store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK:  store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK:  store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK:  store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK:  store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK:  store i64 %[[VAL_20]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @attach_never_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK:  %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK:  %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK:  %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK:  %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK:  %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK:  %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK:  %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK:  %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK:  %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK:  %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK:  store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK:  store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK:  store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK:  store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK:  store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @attach_auto_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK:  %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK:  %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK:  %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK:  %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK:  %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK:  %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK:  %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK:  %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK:  %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK:  %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK:  %[[VAL_19:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK:  %[[VAL_20:.*]] = select i1 %[[VAL_19]], i64 0, i64 24
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK:  store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK:  store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK:  store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK:  store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK:  store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK:  store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK:  store i64 %[[VAL_20]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_ptee_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK:  %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK:  %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK:  %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK:  %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK:  %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK:  %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK:  %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK:  %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK:  %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK:  %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK:  %[[VAL_19:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK:  %[[VAL_20:.*]] = select i1 %[[VAL_19]], i64 0, i64 24
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK:  store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK:  store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK:  store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK:  store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK:  store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK:  store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK:  store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK:  store i64 %[[VAL_20]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK:  %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_1:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK:  %[[VAL_2:.*]] = select i1 %[[VAL_1]], i64 0, i64 24
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_2]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+
+// CHECK: define void @ref_ptee_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_3:.*]] = select i1 %[[VAL_2]], i64 0, i64 24
+// CHECK: %[[VAL_4:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_5:.*]] = select i1 %[[VAL_4]], i64 0, i64 4
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_3]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK:  store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_ptee_attach_never_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK:  %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK:  %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK:  %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK:  %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK:  %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK:  %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK:  %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK:  %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK:  %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK:  %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK:  %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK:  %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK:  %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK:  %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK:  store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK:  store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK:  store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK:  store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK:  %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK:  store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK:  %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK:  store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK:  %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK:  store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
index 4912cf34072fb..7b3eb5ce24257 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
@@ -27,8 +27,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
   }
 }
 
-// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 48, i64 0]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710659, i64 3]
 
 // CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {
 
@@ -43,24 +43,19 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 // CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
 // CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 
-// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 // CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0
 // CHECK:  store ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-// CHECK:  %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK:  %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
 // CHECK:  store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
 
-// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
 // CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 1
 // CHECK:  store ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
 
-// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 // CHECK:  store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-// CHECK:  store ptr %[[NESTED_STRUCT_PTR_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-
-// CHECK:  %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-// CHECK:  store ptr %[[NESTED_STRUCT_PTR_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
-// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK:  %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 2
 // CHECK:  store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
index 8837b42f70a44..b1eac23230a50 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
@@ -41,14 +41,14 @@ llvm.func @_QQmain() {
 // CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_3]], i64 0, i64 1
 // CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1
 // CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64
-// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64
+// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_3]] to i64
 // CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
 // CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 
 // CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 // CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
 // CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8
+// CHECK: store ptr %[[MEMBER_ACCESS_3]], ptr %[[PTR_ARR]], align 8
 // CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
 // CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8
 
diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
index 39b2f37a031a8..f61c03e5a53e9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
@@ -25,33 +25,33 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
 
 // CHECK: %struct.[[TSK_WTH_PRVTS:.*]] = type { %struct.kmp_task_ompbuilder_t, %struct.[[PRVTS:.*]] }
 // CHECK: %struct.kmp_task_ompbuilder_t = type { ptr, ptr, i32, ptr, ptr }
-// CHECK: %struct.[[PRVTS]] = type { [6 x ptr], [6 x ptr], [6 x i64] }
+// CHECK: %struct.[[PRVTS]] = type { [5 x ptr], [5 x ptr], [5 x i64] }
 
 // CHECK: define void @launch_(ptr captures(none) %0)
 // CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
-// CHECK: %[[BASEPTRS:.*]] = alloca [6 x ptr], align 8
-// CHECK: %[[PTRS:.*]] = alloca [6 x ptr], align 8
-// CHECK: %[[MAPPERS:.*]] = alloca [6 x ptr], align 8
-// CHECK: %[[SIZES:.*]] = alloca [6 x i64], align 4
+// CHECK: %[[BASEPTRS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[PTRS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[MAPPERS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[SIZES:.*]] = alloca [5 x i64], align 4
 
-// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [6 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
-// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [6 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
-// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [6 x ptr], ptr %[[PTRS]], i32 0, i32 0
-// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [6 x i64], ptr %[[SIZES]], i32 0, i32 0
+// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[PTRS]], i32 0, i32 0
+// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [5 x i64], ptr %[[SIZES]], i32 0, i32 0
 
 // CHECK: %[[GL_THRD_NUM:.*]] = call i32 @__kmpc_global_thread_num
-// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 184, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1)
+// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 160, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1)
 // CHECK: %[[TSK_PTR:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 0
 // CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TSK_PTR]], i32 0, i32 0
 // CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8
 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[SHAREDS_PTR]], ptr align 1 %[[STRUCTARG]], i64 16, i1 false)
 // CHECK: %[[VAL_50:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 1
 // CHECK: %[[VAL_51:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 0
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 48, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 40, i1 false)
 // CHECK: %[[VAL_53:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 1
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 48, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 40, i1 false)
 // CHECK: %[[VAL_54:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 2
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 48, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 40, i1 false)
 // CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_omp_task(ptr @4, i32 %[[GL_THRD_NUM]], ptr %[[TASK_DESC]])
 
 // CHECK: define internal void @[[WORKER:.*]](i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
index e19b96bb2d732..c5b966f6dc817 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
@@ -39,14 +39,14 @@ llvm.func @_QQmain() {
 // CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_2]], i64 0, i64 1
 // CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1
 // CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64
-// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64
+// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_2]] to i64
 // CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
 // CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 
 // CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 // CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
 // CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8
+// CHECK: store ptr %[[MEMBER_ACCESS_2]], ptr %[[PTR_ARR]], align 8
 // CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
 // CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8
 
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
index 725b7c169c2df..0f1cfd6f7d74f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
@@ -59,9 +59,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 
 // CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
 // CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
-// CHECK: @.offload_sizes = private unnamed_addr constant [15 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 0, i64 0, i64 0, i64 8, i64 0, i64 0, i64 0, i64 0, i64 8, i64 0]
-// CHECK: @.offload_maptypes = private unnamed_addr constant [15 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 32, i64 1688849860263939, i64 1688849860263939, i64 1688849860263939, i64 1688849860263955, i64 32, i64 3096224743817219, i64 3096224743817219, i64 3096224743817219, i64 3096224743817235]
-// CHECK: @.offload_mapnames = private constant [15 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [12 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 3, i64 32, i64 1407374883553283, i64 1407374883553283, i64 3, i64 32, i64 2533274790395907, i64 2533274790395907, i64 3]
 
 // CHECK: define void @main()
 // CHECK: %[[SCALAR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
@@ -103,79 +101,65 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 // CHECK: %[[SCALAR_BASE_OFF_SZ2:.*]] = ptrtoint ptr %[[SCALAR_BASE_OFF]] to i64
 // CHECK: %[[SCALAR_BASE_OFF_SZ3:.*]] = sub i64 %[[SCALAR_BASE_OFF_SZ1]], %[[SCALAR_BASE_OFF_SZ2]]
 // CHECK: %[[SCALAR_BASE_OFF_SZ4:.*]] = sdiv exact i64 %[[SCALAR_BASE_OFF_SZ3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
 // CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 0
 // CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 0
 // CHECK: store i64 %[[FULL_ARR_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
 // CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 1
 // CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
 // CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 2
 // CHECK: store ptr getelementptr inbounds nuw (i8, ptr @full_arr, i64 8), ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 2
 // CHECK: store i64 %[[FULL_ARR_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
 // CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 3
 // CHECK: store ptr %[[FULL_ARR_PTR]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 4
-// CHECK: store i64 %[[IS_NULL]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
 // CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 5
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 4
 // CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 5
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 4
 // CHECK: store i64 %[[ARR_SECT_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
 // CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 6
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 5
 // CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
 // CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 7
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 6
 // CHECK: store ptr getelementptr inbounds nuw (i8, ptr @sect_arr, i64 8), ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 7
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 6
 // CHECK: store i64 %[[ARR_SECT_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
 // CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 9
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 7
 // CHECK: store ptr %[[ARR_SECT_PTR]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 9
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 7
 // CHECK: store i64 %[[IS_NULL2]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 10
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 10
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 8
 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 10
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 8
 // CHECK: store i64 %[[SCALAR_DESC_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 11
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 11
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 9
 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 12
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 10
 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 12
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 10
 // CHECK: store ptr %[[SCALAR_BASE_OFF]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 12
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 10
 // CHECK: store i64 %[[SCALAR_BASE_OFF_SZ4]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 13
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 11
 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 13
-// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 14
-// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 14
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 11
 // CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8
diff --git a/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90 b/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
index 45a18b7f38ed3..5e33770648e34 100644
--- a/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
+++ b/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
@@ -5,7 +5,8 @@
 ! device.
 ! REQUIRES: flang, amdgpu
 
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
 module test
 contains
     subroutine kernel_1d(array)
diff --git a/offload/test/offloading/fortran/map_attach_always.f90 b/offload/test/offloading/fortran/map_attach_always.f90
new file mode 100644
index 0000000000000..f5c7c5e6df921
--- /dev/null
+++ b/offload/test/offloading/fortran/map_attach_always.f90
@@ -0,0 +1,70 @@
+
+! This checks that attach always forces attachment.
+! NOTE: We have to make sure the old default auto attach behaviour is off to
+! yield the correct results for this test. Otherwise the second target will
+! be treated as if we'd had the attach always specified!
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+    implicit none
+    integer,  pointer :: map_ptr(:)
+    integer, target :: a(10)
+    integer, target :: b(10)
+    integer :: index, n
+    logical :: correct
+
+    n = 10
+    correct = .true.
+
+    do index = 1, n
+        a(index) = 10
+        b(index) = 20
+    end do
+
+    map_ptr => a
+
+   ! This should map a,b and map_ptr to device, and attach map_ptr
+   ! to a (as it is assigned to it above), and as b is already on
+   ! device running through target.
+   !$omp target enter data map(ref_ptr_ptee, to: map_ptr)
+   !$omp target enter data map(to: b, a)
+
+    !$omp target map(to: index) map(tofrom: correct)
+        do index = 1, n
+            if (map_ptr(index) /= 10) then
+                correct = .false.
+            endif
+        end do
+    !$omp end target
+
+    map_ptr => b
+
+    ! No attach always to force re-attachment, so we should still
+    ! be attached to "a"
+    !$omp target map(to: index) map(tofrom: correct)
+        do index = 1, n
+            if (map_ptr(index) /= 10) then
+                correct = .false.
+            endif
+        end do
+    !$omp end target
+
+    !$omp target map(to: index) map(attach(always): map_ptr) map(tofrom: correct)
+        do index = 1, n
+            if (map_ptr(index) /= 20) then
+                correct = .false.
+            endif
+        end do
+    !$omp end target
+
+    if (correct .NEQV. .true.) then
+        print*, "Failed!"
+        stop 1
+    endif
+
+    print*, "Passed!"
+end program
+
+!CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_attach_never.f90 b/offload/test/offloading/fortran/map_attach_never.f90
new file mode 100644
index 0000000000000..5b4d2dc5c6cd8
--- /dev/null
+++ b/offload/test/offloading/fortran/map_attach_never.f90
@@ -0,0 +1,55 @@
+! This checks that attach never prevents pointer attachment when specified.
+! NOTE: We have to make sure the old default auto attach behaviour is off to
+! yield the correct results for this test. Otherwise the second target will
+! be treated as if we'd had the attach always specified!
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+    implicit none
+    integer,  pointer :: map_ptr(:)
+    integer, target :: a(10)
+    integer, target :: b(10)
+    integer :: index, n
+    logical :: correct
+
+    correct = .true.
+    n = 10
+
+    do index = 1, n
+        a(index) = 10
+        b(index) = 20
+    end do
+
+    map_ptr => a
+
+    ! This should map a and map_ptr to device, and attach map_ptr
+    ! to a (as it is assigned to it above).
+    !$omp target enter data map(ref_ptr_ptee, to: map_ptr)
+
+    map_ptr => b
+
+    ! As "b" hasn't been mapped to device yet, the first time it's mapped will
+    ! be when map_ptr is re-mapped (implicitly or explicitly), the default behavior
+    ! when LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS is switched off would force attachment
+    ! of map_ptr to b as we've assigned it above. To prevent this and test the never
+    ! attachment, we can apply attach(never), which prevents this reattachment from
+    ! occurring
+    !$omp target map(to: index) map(tofrom: correct) map(attach(never): map_ptr)
+        do index = 1, n
+            if (map_ptr(index) /= 10) then
+                correct = .false.
+            endif
+        end do
+    !$omp end target
+
+    if (correct .NEQV. .true.) then
+        print*, "Failed!"
+        stop 1
+    endif
+
+    print*, "Passed!"
+end program
+
+!CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90 b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
new file mode 100644
index 0000000000000..e8ead0a251f97
--- /dev/null
+++ b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
@@ -0,0 +1,48 @@
+! This checks that we can specify ref_ptee and ref_ptr, not encounter
+! an error and correctly map data to and from device.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-version=61
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0  %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+    implicit none
+    integer,  pointer :: map_ptr(:)
+    integer, target :: b(10)
+    integer :: index
+
+    map_ptr => b
+
+    ! Should have auto attach applied if my reading is
+    ! correct and automatically attach to ref_ptee. So
+    ! internally we implicitly apply the attach map
+    ! type.
+    !$omp target enter data map(ref_ptee, to: map_ptr)
+    !$omp target enter data map(ref_ptr, to: map_ptr)
+
+    ! should in theory memory access fault if we haven't attached
+    ! correctly above. But if all went well should go fine.
+    !$omp target map(to: index)
+        do index = 1, 10
+            map_ptr(index) = index
+        end do
+    !$omp end target
+
+    ! Don't care about the descriptor, but we do want to
+    ! deallocate it and only it and then map the data
+    ! back. Doing it in a weird-ish order to test we can
+    ! delete the descriptor separately and still pull the
+    ! data back.
+    !$omp target exit data map(ref_ptee, from: map_ptr)
+    !$omp target exit data map(ref_ptr, delete: map_ptr)
+
+    do index = 1, 10
+        if (map_ptr(index) /= index) then
+            print*, "Failed!"
+            stop 1
+        endif
+    end do
+
+    print*, "Passed!"
+end program
+
+! CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90 b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
new file mode 100644
index 0000000000000..b5fbfbe8df780
--- /dev/null
+++ b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
@@ -0,0 +1,47 @@
+! This checks that we can specify ref_ptee and ref_ptr, not encounter
+! an error and correctly map data to and from device. This does so
+! in a different order from map_ref_ptr_ptee_test_1.f90 to verify we
+! do not hit any odd runtime errors from mapping in a different order.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-version=61
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0  %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+program main
+    implicit none
+    integer,  pointer :: map_ptr(:)
+    integer, target :: b(10)
+    integer :: index
+
+    map_ptr => b
+
+    !$omp target enter data map(ref_ptr, to: map_ptr)
+    !$omp target enter data map(ref_ptee, to: map_ptr)
+
+    ! should in theory memory access fault if we haven't attached
+    ! correctly above. But if all went well should go fine.
+    !$omp target map(to: index)
+        do index = 1, 10
+            map_ptr(index) = index
+        end do
+    !$omp end target
+
+    ! Don't care about the descriptor, but we do want to
+    ! deallocate it and only it and then map the data
+    ! back. Doing it in a weird-ish order to test we can
+    ! delete the descriptor separately and still pull the
+    ! data back.
+    !$omp target exit data map(ref_ptr, delete: map_ptr)
+    !$omp target exit data map(ref_ptee, from: map_ptr)
+
+    do index = 1, 10
+        if (map_ptr(index) /= index) then
+            print*, "Failed!"
+            stop 1
+        endif
+    end do
+
+    print*, "Passed!"
+end program
+
+! CHECK: Passed
\ No newline at end of file
diff --git a/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90 b/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
index 8e1e68528943f..da9a8e70967f3 100644
--- a/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
+++ b/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
@@ -4,7 +4,8 @@
 ! directives
 ! REQUIRES: flang, amdgpu
 
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
 module dtype
     type :: my_dtype
             integer :: s, e



More information about the llvm-branch-commits mailing list