[llvm-branch-commits] [flang] [llvm] [mlir] [OpenMP][MLIR] Modify OpenMP Dialect lowering to support attach mapping (PR #179023)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 8 21:08:47 PDT 2026
https://github.com/agozillon updated https://github.com/llvm/llvm-project/pull/179023
>From fdc966410c62909ff41376731c6493dcb55140d2 Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Mon, 26 Jan 2026 11:15:19 -0600
Subject: [PATCH] [OpenMP][MLIR] Modify lowering OpenMP Dialect lowering to
support attach mapping
This PR adjusts the LLVM-IR lowering to support the new attach map type that the runtime
uses to link data and pointer together, this swaps the mapping from the older
OMP_MAP_PTR_AND_OBJ map type in most cases and allows slightly more complicated ref_ptr/ptee
and attach semantics.
---
.../OpenMP/map-types-and-sizes.f90 | 264 +++----
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 3 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 20 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 672 ++++++++++--------
.../allocatable_gpu_reduction_teams.mlir | 3 +-
.../omptarget-data-use-dev-ordering.mlir | 21 +-
.../LLVMIR/omptarget-host-ref-semantics.mlir | 273 +++++++
mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 8 +-
.../omptarget-mapper-combined-entry.mlir | 10 +-
...t-nested-ptr-record-type-mapping-host.mlir | 23 +-
...arget-nested-record-type-mapping-host.mlir | 8 +-
mlir/test/Target/LLVMIR/omptarget-nowait.mlir | 26 +-
...ptarget-overlapping-record-member-map.mlir | 37 +-
.../omptarget-record-type-mapping-host.mlir | 8 +-
...rget-record-type-with-ptr-member-host.mlir | 85 +--
.../descriptor-stack-jam-regression.f90 | 3 +-
.../offloading/fortran/map_attach_always.f90 | 70 ++
.../offloading/fortran/map_attach_never.f90 | 55 ++
.../fortran/map_ref_ptr_ptee_test_1.f90 | 47 ++
.../fortran/map_ref_ptr_ptee_test_2.f90 | 47 ++
...ap-pointer-to-dtype-allocatable-member.f90 | 3 +-
21 files changed, 1083 insertions(+), 603 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
create mode 100644 offload/test/offloading/fortran/map_attach_always.f90
create mode 100644 offload/test/offloading/fortran/map_attach_never.f90
create mode 100644 offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
create mode 100644 offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
index 317eb4b0b69d6..93f28a7525e19 100644
--- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
@@ -42,8 +42,8 @@ subroutine mapType_is_device_ptr
!$omp end target
end subroutine mapType_is_device_ptr
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [6 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [6 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 281474976711171, i64 281474976711187, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 24, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 515, i64 16384, i64 288]
subroutine mapType_ptr
integer, pointer :: a
!$omp target
@@ -82,8 +82,8 @@ subroutine map_ompx_hold
!$omp end target data
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [6 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [6 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 281474976711171, i64 281474976711187, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 24, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 515, i64 16384, i64 288]
subroutine mapType_allocatable
integer, allocatable :: a
allocate(a)
@@ -93,8 +93,8 @@ subroutine mapType_allocatable
deallocate(a)
end subroutine mapType_allocatable
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [6 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [6 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 24, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384, i64 288]
subroutine mapType_ptr_explicit
integer, pointer :: a
!$omp target map(tofrom: a)
@@ -102,8 +102,8 @@ subroutine mapType_ptr_explicit
!$omp end target
end subroutine mapType_ptr_explicit
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [6 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [6 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 24, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384, i64 288]
subroutine mapType_allocatable_explicit
integer, allocatable :: a
allocate(a)
@@ -254,8 +254,8 @@ subroutine mapType_derived_explicit_nested_member_with_bounds
!$omp end target
end subroutine mapType_derived_explicit_nested_member_with_bounds
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 48, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 48, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384, i64 288]
subroutine mapType_derived_type_alloca()
type :: one_layer
real(4) :: i
@@ -275,8 +275,8 @@ subroutine mapType_derived_type_alloca()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [10 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [10 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 40, i64 0, i64 48, i64 0, i64 4, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 281474976710659, i64 16384, i64 16384, i64 288]
subroutine mapType_alloca_derived_type()
type :: one_layer
real(4) :: i
@@ -298,8 +298,8 @@ subroutine mapType_alloca_derived_type()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [10 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [10 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 40, i64 0, i64 48, i64 0, i64 4, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 281474976710659, i64 16384, i64 16384, i64 288]
subroutine mapType_alloca_nested_derived_type()
type :: middle_layer
real(4) :: i
@@ -329,8 +329,8 @@ subroutine mapType_alloca_nested_derived_type()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 48, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 48, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384, i64 288]
subroutine mapType_nested_derived_type_alloca()
type :: middle_layer
real(4) :: i
@@ -358,8 +358,8 @@ subroutine mapType_nested_derived_type_alloca()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 64, i64 8, i64 0, i64 48, i64 8, i64 0, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710661, i64 281474976710656, i64 281474976710672, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 288]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 64, i64 0, i64 48, i64 0, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 16384, i64 16384, i64 288]
subroutine mapType_nested_derived_type_member_idx()
type :: vertexes
integer :: test
@@ -432,21 +432,23 @@ end subroutine mapType_common_block_members
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_ptr_explicit_{{.*}}
!CHECK: %[[ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
-!CHECK: %[[ALLOCA_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ALLOCA]], i32 1
-!CHECK: %[[ALLOCA_GEP_INT:.*]] = ptrtoaddr ptr %[[ALLOCA_GEP]] to i64
-!CHECK: %[[ALLOCA_INT:.*]] = ptrtoaddr ptr %[[ALLOCA]] to i64
-!CHECK: %[[SIZE_DIFF:.*]] = sub i64 %[[ALLOCA_GEP_INT]], %[[ALLOCA_INT]]
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [6 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[SIZE_DIFF]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: %[[ALLOCA_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ALLOCA]], i32 0, i32 0
+!CHECK: %[[LOAD_PTR:.*]] = load ptr, ptr %[[ALLOCA_GEP]], align 8
+!CHECK: %[[LOAD_PTR2:.*]] = load ptr, ptr %[[ALLOCA_GEP]], align 8
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[LOAD_PTR2]], null
+!CHECK: %[[SELECT:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 4
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[SELECT]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_allocatable_explicit_{{.*}}
!CHECK: %[[ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
-!CHECK: %[[ALLOCA_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ALLOCA]], i32 1
-!CHECK: %[[ALLOCA_GEP_INT:.*]] = ptrtoaddr ptr %[[ALLOCA_GEP]] to i64
-!CHECK: %[[ALLOCA_INT:.*]] = ptrtoaddr ptr %[[ALLOCA]] to i64
-!CHECK: %[[SIZE_DIFF:.*]] = sub i64 %[[ALLOCA_GEP_INT]], %[[ALLOCA_INT]]
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [6 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[SIZE_DIFF]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: %[[ALLOCA_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ALLOCA]], i32 0, i32 0
+!CHECK: %[[LOAD_PTR:.*]] = load ptr, ptr %[[ALLOCA_GEP]], align 8
+!CHECK: %[[LOAD_PTR2:.*]] = load ptr, ptr %[[ALLOCA_GEP]], align 8
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[LOAD_PTR2]], null
+!CHECK: %[[SELECT:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 4
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[SELECT]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_derived_implicit_{{.*}}
!CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_derived_implicitTscalar_and_array, i64 1, align 8
@@ -560,12 +562,13 @@ end subroutine mapType_common_block_members
!CHECK: %[[DESC_BASE_ADDR_DATA_SIZE:.*]] = mul i64 %[[MEMBER_BASE_ADDR_SIZE]], 4
!CHECK: %[[LOAD_ADDR_DATA:.*]] = load ptr, ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], align 8
!CHECK: %[[GEP_ADDR_DATA:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ADDR_DATA]], i64 0
+!CHECK: %[[LOAD_ADDR_DATA2:.*]] = load ptr, ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], align 8
+!CHECK: %[[GEP_ADDR_DATA2:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ADDR_DATA2]], i64 0
!CHECK: %[[MEMBER_ACCESS_ADDR_END:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i64 1
!CHECK: %[[MEMBER_ACCESS_ADDR_INT:.*]] = ptrtoaddr ptr %[[MEMBER_ACCESS_ADDR_END]] to i64
!CHECK: %[[MEMBER_ACCESS_ADDR_BEGIN:.*]] = ptrtoaddr ptr %[[MEMBER_ACCESS]] to i64
!CHECK: %[[DTYPE_SIZE_CALC:.*]] = sub i64 %[[MEMBER_ACCESS_ADDR_INT]], %[[MEMBER_ACCESS_ADDR_BEGIN]]
!CHECK: %[[DTYPE_CMP:.*]] = icmp eq ptr %[[GEP_ADDR_DATA]], null
-!CHECK: %[[DTYPE_SEL:.*]] = select i1 %[[DTYPE_CMP]], i64 0, i64 %[[DESC_BASE_ADDR_DATA_SIZE]]
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
@@ -579,14 +582,11 @@ end subroutine mapType_common_block_members
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[GEP_ADDR_DATA2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[MEMBER_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
!CHECK: store ptr %array_offset, ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[DTYPE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_derived_type_{{.*}}
!CHECK: %{{.*}} = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
@@ -607,7 +607,6 @@ end subroutine mapType_common_block_members
!CHECK: %[[DTYPE_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[DTYPE_ALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD]], i32 0, i32 4
!CHECK: %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], i32 0, i32 0
-
!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA]], i32 0, i32 0
!CHECK: %[[DTYPE_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_2]], align 8
!CHECK: %[[DTYPE_NONALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD_2]], i32 0, i32 5
@@ -617,62 +616,29 @@ end subroutine mapType_common_block_members
!CHECK: %[[MEMBER_SIZE_CALC_3:.*]] = mul i64 1, %[[MEMBER_SIZE_CALC_2]]
!CHECK: %[[MEMBER_SIZE_CALC_4:.*]] = mul i64 %[[MEMBER_SIZE_CALC_3]], 4
!CHECK: %[[DTYPE_BASE_ADDR_LOAD_3:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
+!CHECK: %[[DTYPE_BASE_ADDR_LOAD_3_1:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
!CHECK: %[[LOAD_DTYPE_DESC_MEMBER:.*]] = load ptr, ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[MEMBER_ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DTYPE_DESC_MEMBER]], i64 0
-!CHECK: %[[DTYPE_END_OFFSET:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_END:.*]] = ptrtoaddr ptr %[[DTYPE_END_OFFSET]] to i64
-!CHECK: %[[DTYPE_BEGIN:.*]] = ptrtoaddr ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-!CHECK: %[[DTYPE_DESC_SZ:.*]] = sub i64 %[[DTYPE_END]], %[[DTYPE_BEGIN]]
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3_OFF:.*]] = getelementptr ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], i32 1
-!CHECK: %[[SIZE_2_CALC_1:.*]] = ptrtoaddr ptr %[[DTYPE_BASE_ADDR_ACCESS_4]] to i64
-!CHECK: %[[SIZE_2_CALC_2:.*]] = ptrtoaddr ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]] to i64
-!CHECK: %[[SIZE_2_CALC_3:.*]] = sub i64 %[[SIZE_2_CALC_1]], %[[SIZE_2_CALC_2]]
-!CHECK: %[[CMP_NULL:.*]] = icmp eq ptr %[[MEMBER_ARRAY_OFFSET]], null
-!CHECK: %[[NULL_SEL:.*]] = select i1 %[[CMP_NULL]], i64 0, i64 %[[MEMBER_SIZE_CALC_4]]
-
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DTYPE_DESC_SZ]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 1
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 2
-!CHECK: store i64 %[[SIZE_2_CALC_3]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_LOAD_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-!CHECK: store ptr %[[MEMBER_ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 7
-!CHECK: store i64 %[[NULL_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-!CHECK: store ptr %[[DTYPE_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_2:.*]] = ptrtoaddr ptr %[[SIZE_CALC_1]] to i64
+!CHECK: %[[SIZE_CALC_3:.*]] = ptrtoaddr ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_4:.*]] = sub i64 %[[SIZE_CALC_2]], %[[SIZE_CALC_3]]
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[DTYPE_BASE_ADDR_LOAD_3_1]], null
+!CHECK: %[[SEL_SZ:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 136
+!CHECK: %[[NULL_CMP2:.*]] = icmp eq ptr %{{.*}}, null
+!CHECK: %[[SEL_SZ2:.*]] = select i1 %[[NULL_CMP2]], i64 0, i64 %[[MEMBER_SIZE_CALC_4]]
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_nested_derived_type{{.*}}
!CHECK: %{{.*}} = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
@@ -698,61 +664,28 @@ end subroutine mapType_common_block_members
!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_4:.*]] = mul i64 1, %[[ALLOCATABLE_MEMBER_SIZE_CALC_3]]
!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
+!CHECK: %[[LOAD_BASE_ADDR2:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
!CHECK: %[[LOAD_DESC_MEMBER_BASE_ADDR:.*]] = load ptr, ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DESC_MEMBER_BASE_ADDR]], i64 0
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_2:.*]] = ptrtoaddr ptr %[[DTYPE_DESC_SIZE_CALC_1]] to i64
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_3:.*]] = ptrtoaddr ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_4:.*]] = sub i64 %[[DTYPE_DESC_SIZE_CALC_2]], %[[DTYPE_DESC_SIZE_CALC_3]]
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3_OFF:.*]] = getelementptr ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], i32 1
-!CHECK: %[[SIZE_2_CALC_1:.*]] = ptrtoaddr ptr %[[DTYPE_BASE_ADDR_ACCESS_3]] to i64
-!CHECK: %[[SIZE_2_CALC_2:.*]] = ptrtoaddr ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]] to i64
-!CHECK: %[[SIZE_2_CALC_3:.*]] = sub i64 %[[SIZE_2_CALC_1]], %[[SIZE_2_CALC_2]]
-!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[ARRAY_OFFSET]], null
-!CHECK: %[[NULL_SEL:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DTYPE_DESC_SIZE_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[LOAD_BASE_ADDR2]], null
+!CHECK: %[[SEL_SZ:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 240
+!CHECK: %[[NULL_CMP2:.*]] = icmp eq ptr %[[ARRAY_OFFSET]], null
+!CHECK: %[[SEL_SZ2:.*]] = select i1 %[[NULL_CMP2]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 2
-!CHECK: store i64 %[[SIZE_2_CALC_3]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[LOAD_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[MAPPED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-!CHECK: store ptr %[[ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 7
-!CHECK: store i64 %[[NULL_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
+!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-!CHECK: store ptr %[[NESTED_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
+!CHECK: getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_alloca{{.*}}
!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
@@ -769,11 +702,13 @@ end subroutine mapType_common_block_members
!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[ARR_OFFS:.*]] = getelementptr inbounds i32, ptr %[[LOAD_BASE_ADDR]], i64 0
+!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], align 8
+!CHECK: %[[ARR_OFFS2:.*]] = getelementptr inbounds i32, ptr %[[LOAD_BASE_ADDR]], i64 0
!CHECK: %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_MEMBER_ACCESS]], i64 1
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_1:.*]] = ptrtoaddr ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2]] to i64
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_2:.*]] = ptrtoaddr ptr %[[NESTED_MEMBER_ACCESS]] to i64
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_3:.*]] = sub i64 %[[DTYPE_SEGMENT_SIZE_CALC_1]], %[[DTYPE_SEGMENT_SIZE_CALC_2]]
-!CHECK: %[[DATA_CMP:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[DATA_CMP:.*]] = icmp eq ptr %[[ARR_OFFS2]], null
!CHECK: %[[DATA_SEL:.*]] = select i1 %[[DATA_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
@@ -788,13 +723,13 @@ end subroutine mapType_common_block_members
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[DATA_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[NESTED_MEMBER_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[DATA_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_member_idx{{.*}}
!CHECK: %[[ALLOCA_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, align 8
@@ -835,14 +770,20 @@ end subroutine mapType_common_block_members
!CHECK: %[[ARR_OFFS:.*]] = getelementptr inbounds %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[LOAD_OFF_PTR]], i64 0
!CHECK: %[[LOAD_ARR_OFFS:.*]] = load ptr, ptr %[[OFF_PTR_4]], align 8
!CHECK: %[[ARR_OFFS_1:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ARR_OFFS]], i64 0
+!CHECK: %[[LOAD_OFF_PTR:.*]] = load ptr, ptr %[[OFF_PTR_2]], align 8
+!CHECK: %[[ARR_OFFS_2:.*]] = getelementptr inbounds %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[LOAD_OFF_PTR]], i64 0
+!CHECK: %[[LOAD_ARR_OFFS:.*]] = load ptr, ptr %[[OFF_PTR_4]], align 8
+!CHECK: %[[ARR_OFFS_3:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ARR_OFFS]], i64 0
!CHECK: %[[SZ_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[OFF_PTR_1]], i64 1
!CHECK: %[[SZ_CALC_2:.*]] = ptrtoaddr ptr %[[SZ_CALC_1]] to i64
!CHECK: %[[SZ_CALC_3:.*]] = ptrtoaddr ptr %[[OFF_PTR_1]] to i64
!CHECK: %[[SZ_CALC_4:.*]] = sub i64 %[[SZ_CALC_2]], %[[SZ_CALC_3]]
-!CHECK: %[[SIZE_CMP:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[SIZE_CMP:.*]] = icmp eq ptr %[[ARR_OFFS_2]], null
!CHECK: %[[SIZE_SEL:.*]] = select i1 %[[SIZE_CMP]], i64 0, i64 %[[OFF_PTR_3]]
-!CHECK: %[[SIZE_CMP2:.*]] = icmp eq ptr %[[ARR_OFFS_1]], null
+!CHECK: %[[SIZE_CMP2:.*]] = icmp eq ptr %[[ARR_OFFS_3]], null
!CHECK: %[[SIZE_SEL2:.*]] = select i1 %[[SIZE_CMP2]], i64 0, i64 %[[SZ_CALC_4_2]]
+!CHECK: %[[SIZE_CMP3:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[SIZE_SEL3:.*]] = select i1 %[[SIZE_CMP3]], i64 0, i64 64
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 0
@@ -856,32 +797,27 @@ end subroutine mapType_common_block_members
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[OFF_PTR_2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS_2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[SIZE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[OFF_PTR_2]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[SIZE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[OFF_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[OFF_PTR_4]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 5
+!CHECK: store i64 %[[SIZE_SEL3]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-!CHECK: store ptr %[[OFF_PTR_4]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_ptrs, i32 0, i32 6
!CHECK: store ptr %[[ARR_OFFS_1]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [8 x i64], ptr %.offload_sizes, i32 0, i32 6
-!CHECK: store i64 %[[SIZE_SEL2]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_{{.*}}
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-!CHECK: store ptr @var_common_, ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-!CHECK: store ptr @var_common_, ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_members_{{.*}}
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 5b54df30cbf19..396b95b701d08 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3583,7 +3583,8 @@ class OpenMPIRBuilder {
InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)>
PrivAndGenMapInfoCB,
llvm::Type *ElemTy, StringRef FuncName,
- CustomMapperCallbackTy CustomMapperCB);
+ CustomMapperCallbackTy CustomMapperCB,
+ bool PreserveMemberOfFlags = false);
/// Generator for '#omp target data'
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 19bfff7a7a4e0..654e6b024a952 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -10170,7 +10170,8 @@ Expected<Function *> OpenMPIRBuilder::emitUserDefinedMapper(
function_ref<MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
llvm::Value *BeginArg)>
GenMapInfoCB,
- Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
+ Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB,
+ bool PreserveMemberOfFlags) {
SmallVector<Type *> Params;
Params.emplace_back(Builder.getPtrTy());
Params.emplace_back(Builder.getPtrTy());
@@ -10267,8 +10268,21 @@ Expected<Function *> OpenMPIRBuilder::emitUserDefinedMapper(
Value *OriMapType = Builder.getInt64(
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
Info->Types[I]));
- Value *MemberMapType =
- Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
+ Value *MemberMapType;
+ if (PreserveMemberOfFlags) {
+ constexpr uint64_t MemberOfMask =
+ static_cast<uint64_t>(OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
+ uint64_t OrigFlags =
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ Info->Types[I]);
+ bool HasMemberOf = (OrigFlags & MemberOfMask) != 0;
+ if (HasMemberOf)
+ MemberMapType = Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
+ else
+ MemberMapType = OriMapType;
+ } else {
+ MemberMapType = Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
+ }
// Combine the map type inherited from user-defined mapper with that
// specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bda84baaa9963..5d8326af810c4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -5235,9 +5235,6 @@ static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
// Certain flags are discarded here such as RefPtee and co.
static llvm::omp::OpenMPOffloadMappingFlags
convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) {
- auto mapTypeToBool = [&mlirFlags](omp::ClauseMapFlags flag) {
- return (mlirFlags & flag) == flag;
- };
const bool hasExplicitMap =
(mlirFlags & ~omp::ClauseMapFlags::is_device_ptr) !=
omp::ClauseMapFlags::none;
@@ -5245,43 +5242,43 @@ convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) {
llvm::omp::OpenMPOffloadMappingFlags mapType =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
- if (mapTypeToBool(omp::ClauseMapFlags::to))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::to))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- if (mapTypeToBool(omp::ClauseMapFlags::from))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::from))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
- if (mapTypeToBool(omp::ClauseMapFlags::always))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::always))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
- if (mapTypeToBool(omp::ClauseMapFlags::del))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::del))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
- if (mapTypeToBool(omp::ClauseMapFlags::return_param))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::return_param))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
- if (mapTypeToBool(omp::ClauseMapFlags::priv))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::priv))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE;
- if (mapTypeToBool(omp::ClauseMapFlags::literal))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::literal))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
- if (mapTypeToBool(omp::ClauseMapFlags::implicit))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::implicit))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
- if (mapTypeToBool(omp::ClauseMapFlags::close))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::close))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
- if (mapTypeToBool(omp::ClauseMapFlags::present))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::present))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
- if (mapTypeToBool(omp::ClauseMapFlags::ompx_hold))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::ompx_hold))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
- if (mapTypeToBool(omp::ClauseMapFlags::attach))
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::attach))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
- if (mapTypeToBool(omp::ClauseMapFlags::is_device_ptr)) {
+ if (bitEnumContainsAll(mlirFlags, omp::ClauseMapFlags::is_device_ptr)) {
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
if (!hasExplicitMap)
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
@@ -5296,6 +5293,15 @@ static void collectMapDataFromMapOperands(
llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
ArrayRef<Value> useDevAddrOperands = {},
ArrayRef<Value> hasDevAddrOperands = {}) {
+
+ auto checkRefPtrOrPteeMapWithAttach = [](omp::ClauseMapFlags mapType) {
+ bool hasRefType =
+ bitEnumContainsAll(mapType, omp::ClauseMapFlags::ref_ptr) ||
+ bitEnumContainsAll(mapType, omp::ClauseMapFlags::ref_ptee);
+ return hasRefType &&
+ bitEnumContainsAll(mapType, omp::ClauseMapFlags::attach);
+ };
+
auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
// Check if this is a member mapping and correctly assign that it is, if
// it is a member of a larger object.
@@ -5316,10 +5322,16 @@ static void collectMapDataFromMapOperands(
// Process MapOperands
for (Value mapValue : mapVars) {
auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
- Value offloadPtr =
- mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
+ bool isRefPtrOrPteeMapWithAttach =
+ checkRefPtrOrPteeMapWithAttach(mapOp.getMapType());
+ Value offloadPtr = (mapOp.getVarPtrPtr() && !isRefPtrOrPteeMapWithAttach)
+ ? mapOp.getVarPtrPtr()
+ : mapOp.getVarPtr();
mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
- mapData.Pointers.push_back(mapData.OriginalValue.back());
+ mapData.Pointers.push_back(
+ isRefPtrOrPteeMapWithAttach
+ ? moduleTranslation.lookupValue(mapOp.getVarPtrPtr())
+ : mapData.OriginalValue.back());
if (llvm::Value *refPtr =
getRefPtrIfDeclareTarget(offloadPtr, moduleTranslation)) {
@@ -5336,12 +5348,22 @@ static void collectMapDataFromMapOperands(
// In every situation we currently have if we have a varPtrPtr present
// we wish to utilise it's type for the base type, main cases are
// currently Fortran descriptor base address maps and attach maps.
- mlir::Type baseTy = mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
- : mapOp.getVarPtrType();
- mapData.BaseType.push_back(moduleTranslation.convertType(baseTy));
- mapData.Sizes.push_back(
- getSizeInBytes(dl, baseTy, mapOp, mapData.Pointers.back(),
- mapData.BaseType.back(), builder, moduleTranslation));
+ mapData.BaseType.push_back(moduleTranslation.convertType(
+ mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
+ : mapOp.getVarPtrType()));
+
+ // For the attach map cases, it's a little odd, as we effectively have to
+ // utilise the base address (including all bounds offsets) for the pointer
+ // field, the pointer address for the base address field, and the pointer
+ // not the data (base addresses) size. So we end up with a mix of base
+ // types and sizes we wish to insert here.
+ mlir::Type sizeType = (isRefPtrOrPteeMapWithAttach || !mapOp.getVarPtrPtr())
+ ? mapOp.getVarPtrType()
+ : mapOp.getVarPtrPtrType().value();
+ mapData.Sizes.push_back(getSizeInBytes(
+ dl, sizeType, isRefPtrOrPteeMapWithAttach ? nullptr : mapOp,
+ mapData.Pointers.back(), moduleTranslation.convertType(sizeType),
+ builder, moduleTranslation));
mapData.MapClause.push_back(mapOp.getOperation());
mapData.Types.push_back(convertClauseMapFlags(mapOp.getMapType()));
mapData.Names.push_back(LLVM::createMappingInformation(
@@ -5358,11 +5380,27 @@ static void collectMapDataFromMapOperands(
}
auto findMapInfo = [&mapData](llvm::Value *val,
- llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
+ llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy,
+ size_t memberCount) {
unsigned index = 0;
bool found = false;
for (llvm::Value *basePtr : mapData.OriginalValue) {
- if (basePtr == val && mapData.IsAMapping[index]) {
+ auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[index]);
+ // TODO: Currently we define an equivalent mapping as
+ // the same base pointer and an equivalent member count, but
+ // that is a loose definition. We may have to extend to check
+ // for other fields (varPtrPtr/individual members being mapped).
+ // Note: Attach maps are not the same as a normal data transfer
+ // they specify to the runtime to perform an attach map and they
+ // (at least at the moment) are never something we would aim to
+ // return in a use_dev_* clause, so they are skipped in terms of
+ // duplicate maps.
+ bool isAttachMap =
+ (mapData.Types[index] &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
+ if (!isAttachMap && basePtr == val && mapData.IsAMapping[index] &&
+ memberCount == mapOp.getMembers().size()) {
found = true;
mapData.Types[index] |=
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
@@ -5383,14 +5421,15 @@ static void collectMapDataFromMapOperands(
llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
// Check if map info is already present for this entry.
- if (!findMapInfo(origValue, devInfoTy)) {
+ if (!findMapInfo(origValue, devInfoTy, mapOp.getMembers().size())) {
mapData.OriginalValue.push_back(origValue);
mapData.Pointers.push_back(mapData.OriginalValue.back());
mapData.IsDeclareTarget.push_back(false);
mapData.BasePointers.push_back(mapData.OriginalValue.back());
- mapData.BaseType.push_back(moduleTranslation.convertType(
- mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
- : mapOp.getVarPtrType()));
+ mlir::Type baseTy = mapOp.getVarPtrPtr()
+ ? mapOp.getVarPtrPtrType().value()
+ : mapOp.getVarPtrType();
+ mapData.BaseType.push_back(moduleTranslation.convertType(baseTy));
mapData.Sizes.push_back(builder.getInt64(0));
mapData.MapClause.push_back(mapOp.getOperation());
mapData.Types.push_back(
@@ -5471,7 +5510,7 @@ static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
}
static void sortMapIndices(llvm::SmallVectorImpl<size_t> &indices,
- omp::MapInfoOp mapInfo) {
+ omp::MapInfoOp mapInfo, bool first = true) {
ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
llvm::SmallVector<size_t> occludedChildren;
llvm::sort(
@@ -5493,10 +5532,10 @@ static void sortMapIndices(llvm::SmallVectorImpl<size_t> &indices,
continue;
if (aIndex < bIndex)
- return true;
+ return first;
if (aIndex > bIndex)
- return false;
+ return !first;
}
// Iterated up until the end of the smallest member and
@@ -5510,9 +5549,6 @@ static void sortMapIndices(llvm::SmallVectorImpl<size_t> &indices,
return memberAParent;
});
- // We remove children from the index list that are overshadowed by
- // a parent, this prevents us retrieving these as the first or last
- // element when the parent is the correct element in these cases.
for (auto v : occludedChildren)
indices.erase(std::remove(indices.begin(), indices.end(), v),
indices.end());
@@ -5526,10 +5562,9 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
return cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp());
llvm::SmallVector<size_t> indices(indexAttr.size());
std::iota(indices.begin(), indices.end(), 0);
- sortMapIndices(indices, mapInfo);
+ sortMapIndices(indices, mapInfo, first);
return llvm::cast<omp::MapInfoOp>(
- mapInfo.getMembers()[first ? indices.front() : indices.back()]
- .getDefiningOp());
+ mapInfo.getMembers()[indices.front()].getDefiningOp());
}
/// This function calculates the array/pointer offset for map data provided
@@ -5629,42 +5664,38 @@ getOverlappedMembers(llvm::SmallVectorImpl<size_t> &overlapMapDataIdxs,
return;
}
- // 1) collect list of top-level overlapping members from MemberOp
- llvm::SmallVector<std::pair<int, ArrayAttr>> memberByIndex;
ArrayAttr indexAttr = parentOp.getMembersIndexAttr();
- for (auto [memIndex, indicesAttr] : llvm::enumerate(indexAttr))
- memberByIndex.push_back(
- std::make_pair(memIndex, cast<ArrayAttr>(indicesAttr)));
-
- // Sort the smallest first (higher up the parent -> member chain), so that
- // when we remove members, we remove as much as we can in the initial
- // iterations, shortening the number of passes required.
- llvm::sort(memberByIndex.begin(), memberByIndex.end(),
- [&](auto a, auto b) { return a.second.size() < b.second.size(); });
-
- // Remove elements from the vector if there is a parent element that
- // supersedes it. i.e. if member [0] is mapped, we can remove members [0,1],
- // [0,2].. etc.
- llvm::SmallVector<std::pair<int, ArrayAttr>> skipList;
- for (auto v : memberByIndex) {
- llvm::SmallVector<int64_t> vArr(v.second.size());
- getAsIntegers(v.second, vArr);
- skipList.push_back(
- *std::find_if(memberByIndex.begin(), memberByIndex.end(), [&](auto x) {
- if (v == x)
- return false;
- llvm::SmallVector<int64_t> xArr(x.second.size());
- getAsIntegers(x.second, xArr);
- return std::equal(vArr.begin(), vArr.end(), xArr.begin()) &&
- xArr.size() >= vArr.size();
- }));
- }
-
- // Collect the indices, as we need the base pointer etc. from the MapData
- // structure which is primarily accessible via index at the moment.
- for (auto v : memberByIndex)
- if (find(skipList.begin(), skipList.end(), v) == skipList.end())
- overlapMapDataIdxs.push_back(v.first);
+ size_t numMembers = indexAttr.size();
+
+ // Pre-convert all member indices to integer arrays for efficient comparison.
+ llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices(numMembers);
+ for (auto [i, indicesAttr] : llvm::enumerate(indexAttr))
+ getAsIntegers(cast<ArrayAttr>(indicesAttr), memberIndices[i]);
+
+ // For each member, check if it's superseded by another (shorter prefix)
+ // member. If member j's indices are a prefix of member i's indices, then
+ // i is a child of j and should be skipped. e.g. if member [0] is mapped,
+ // we skip members [0,1], [0,2], etc.
+ llvm::SmallDenseSet<size_t> skipIndices;
+ for (size_t i = 0; i < numMembers; ++i) {
+ const auto &iIndices = memberIndices[i];
+ for (size_t j = 0; j < numMembers; ++j) {
+ if (i == j)
+ continue;
+ const auto &jIndices = memberIndices[j];
+ // If j's indices are a strict prefix of i's indices, skip i
+ if (jIndices.size() < iIndices.size() &&
+ std::equal(jIndices.begin(), jIndices.end(), iIndices.begin())) {
+ skipIndices.insert(i);
+ break; // No need to check other potential parents
+ }
+ }
+ }
+
+ // Collect indices of members that are not superseded by a parent.
+ for (size_t i = 0; i < numMembers; ++i)
+ if (!skipIndices.contains(i))
+ overlapMapDataIdxs.push_back(i);
}
// The intent is to verify if the mapped data being passed is a
@@ -5688,6 +5719,112 @@ static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
return false;
}
+// This function handles the insertion of a single item of map data from
+// MapInfoData into the OMPIRBuilder's MapInfo list. Utilising this function
+// means the map being inserted can be treated as a non-parent map entity,
+// if the memberOfFlag is set then the map being inserted is treated as
+// a member map of a larger entity. The insertion into the MapInfo list of
+// the OMPIRBuilder can vary based on a number of factors, such as if it's
+// a ref_ptr or ref_ptee map, if it's a member of a record, what construct
+// the map belongs to and the various map type bit flags that are set for
+// the map.
+static void
+processIndividualMap(llvm::IRBuilderBase &builder,
+ llvm::OpenMPIRBuilder &ompBuilder, MapInfoData &mapData,
+ size_t mapDataIdx, MapInfosTy &combinedInfo,
+ TargetDirectiveEnumTy targetDirective,
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+ bool isTargetParam = true, int mapDataParentIdx = -1) {
+ auto mapFlag = mapData.Types[mapDataIdx];
+ auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
+
+ bool isPtrTy = checkIfPointerMap(mapInfoOp);
+ bool isAttachMap = ((convertClauseMapFlags(mapInfoOp.getMapType()) &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+
+ // Declare target variables are not passed to the kernel, and for the moment
+ // attach maps are not passed to the kernel. However, it is possible to create
+ // attach maps that transfer data and thus can be kernel arguments, but our
+ // existing frontend does not do this.
+ if (isTargetParam &&
+ (targetDirective == TargetDirectiveEnumTy::Target &&
+ !mapData.IsDeclareTarget[mapDataIdx]) &&
+ !isAttachMap)
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+
+ if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
+ !isPtrTy)
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+
+ // If we have a pointer and it's part of a MEMBER_OF mapping we do not apply
+ // MEMBER_OF, as the runtime currently has a work-around that utilises
+ // MEMBER_OF to prevent reference updating in certain scenarios instead of
+ // target_param. However, this causes a noticeable issue in cases where we
+ // map some data (Fortran descriptor primarily at the moment), alter it on
+ // the host, and then expect it to not be updated in a subsequent implicit map
+ // (such as an implicit map on a target).
+ if (memberOfFlag != llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE) {
+ if (!isPtrTy && !isAttachMap)
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+
+ // The return parameter should be the over-riding parent in cases where we
+ // have a return parameter that is echoed to all members, the main case of
+ // this currently is with fortran descriptors. It may need more finessing
+ // for C/C++ in the future or descriptors that are members of derived
+ // types.
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ }
+
+ // We apply MAP_PTR_AND_OBJ when within a declare mapper object as it enforces
+ // MEMBER_OF mappings on maps that are passed the initial nesting depth, which
+ // includes pointed to data and attach members, both of which are technically
+ // not part of the main object. This has the side effect of causing early
+ // map-backs in certain cases where an implicit declare mapper has been
+ // emitted for a target region. Applying MAP_PTR_AND_OBJ in these situations
+ // circumvents this.
+ if (isPtrTy && !isAttachMap && mapData.IsDeclareTarget[mapDataIdx])
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+
+ // if we're provided a mapDataParentIdx, then the data being mapped is
+ // part of a larger object (in a parent <-> member mapping) and in this
+ // case our BasePointer should be the parent. Except in the edge case
+ // where we are mapping pointee data, where we try staying close to
+ // what Clang currently does and utilise the regular base pointer of the
+ // data.
+ bool isRefPtee =
+ !bitEnumContainsAll(mapInfoOp.getMapType(),
+ omp::ClauseMapFlags::ref_ptr) &&
+ bitEnumContainsAll(mapInfoOp.getMapType(), omp::ClauseMapFlags::ref_ptee);
+ bool isRefPtrPtee =
+ bitEnumContainsAll(mapInfoOp.getMapType(),
+ omp::ClauseMapFlags::ref_ptr) &&
+ bitEnumContainsAll(mapInfoOp.getMapType(), omp::ClauseMapFlags::ref_ptee);
+
+ if (!mapInfoOp->getParentOfType<omp::DeclareMapperOp>() &&
+ mapDataParentIdx >= 0 && !(isRefPtee || (isRefPtrPtee && isPtrTy))) {
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataParentIdx]);
+ } else {
+ combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
+ }
+
+ combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
+ combinedInfo.DevicePointers.emplace_back(
+ memberOfFlag != llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE
+ ? llvm::OpenMPIRBuilder::DeviceInfoTy::None
+ : mapData.DevicePointers[mapDataIdx]);
+ combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
+ combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.Sizes.emplace_back(
+ isPtrTy ? builder.CreateSelect(
+ builder.CreateIsNull(mapData.Pointers[mapDataIdx]),
+ builder.getInt64(0), mapData.Sizes[mapDataIdx])
+ : mapData.Sizes[mapDataIdx]);
+}
+
// This creates two insertions into the MapInfosTy data structure for the
// "parent" of a set of members, (usually a container e.g.
// class/structure/derived type) when subsequent members have also been
@@ -5703,38 +5840,43 @@ static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
//
// This function borrows a lot from Clang's emitCombinedEntry function
// inside of CGOpenMPRuntime.cpp
-static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
+static void mapParentWithMembers(
LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
MapInfoData &mapData, uint64_t mapDataIndex,
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag,
TargetDirectiveEnumTy targetDirective) {
+ using MapFlags = llvm::omp::OpenMPOffloadMappingFlags;
assert(!ompBuilder.Config.isTargetDevice() &&
"function only supported for host device codegen");
-
auto parentClause =
llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-
auto *parentMapper = mapData.Mappers[mapDataIndex];
// Map the first segment of the parent. If a user-defined mapper is attached,
// include the parent's to/from-style bits (and common modifiers) in this
// base entry so the mapper receives correct copy semantics via its 'type'
// parameter. Also keep TARGET_PARAM when required for kernel arguments.
- llvm::omp::OpenMPOffloadMappingFlags baseFlag =
- (targetDirective == TargetDirectiveEnumTy::Target &&
- !mapData.IsDeclareTarget[mapDataIndex])
- ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
- : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+ MapFlags baseFlag = (targetDirective == TargetDirectiveEnumTy::Target &&
+ !mapData.IsDeclareTarget[mapDataIndex])
+ ? MapFlags::OMP_MAP_TARGET_PARAM
+ : MapFlags::OMP_MAP_NONE;
if (parentMapper) {
- using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
// Preserve relevant map-type bits from the parent clause. These include
// the copy direction (TO/FROM), as well as commonly used modifiers that
// should be visible to the mapper for correct behaviour.
- mapFlags parentFlags = mapData.Types[mapDataIndex];
- mapFlags preserve = mapFlags::OMP_MAP_TO | mapFlags::OMP_MAP_FROM |
- mapFlags::OMP_MAP_ALWAYS | mapFlags::OMP_MAP_CLOSE |
- mapFlags::OMP_MAP_PRESENT | mapFlags::OMP_MAP_OMPX_HOLD;
+ MapFlags parentFlags = mapData.Types[mapDataIndex];
+ MapFlags preserve = MapFlags::OMP_MAP_TO | MapFlags::OMP_MAP_FROM |
+ MapFlags::OMP_MAP_ALWAYS | MapFlags::OMP_MAP_CLOSE |
+ MapFlags::OMP_MAP_PRESENT |
+ MapFlags::OMP_MAP_OMPX_HOLD |
+ MapFlags::OMP_MAP_IMPLICIT;
+ baseFlag |= (parentFlags & preserve);
+ } else {
+ MapFlags parentFlags = mapData.Types[mapDataIndex];
+ MapFlags preserve =
+ MapFlags::OMP_MAP_PRESENT | MapFlags::OMP_MAP_RETURN_PARAM;
baseFlag |= (parentFlags & preserve);
}
@@ -5770,15 +5912,34 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
int firstMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
- lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
+ lowAddr = builder.CreatePointerCast(mapData.BasePointers[firstMemberIdx],
builder.getPtrTy());
+
int lastMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
+ auto lastMemberMapInfo =
+ cast<omp::MapInfoOp>(mapData.MapClause[lastMemberIdx]);
+
+ // NOTE: Currently, for RefPtee the BaseType is set to the varPtrPtr field,
+ // which is the pointer datas type and not the member within the structure
+ // that it's part of, so we have to make sure we use the member type in this
+ // case when calculating the parents size offsets.
+ // TODO: May be good to extend MapInfoData to support tracking of both
+ // VarPtr/VarPtrPtr BaseType's to better distinguish what's being used more
+ // consistently.
+ bool isRefPteeMap = bitEnumContainsAll(lastMemberMapInfo.getMapType(),
+ omp::ClauseMapFlags::ref_ptee) &&
+ !bitEnumContainsAll(lastMemberMapInfo.getMapType(),
+ omp::ClauseMapFlags::ref_ptr);
+ llvm::Type *castType = mapData.BaseType[lastMemberIdx];
+ if (isRefPteeMap)
+ castType =
+ moduleTranslation.convertType(lastMemberMapInfo.getVarPtrType());
highAddr = builder.CreatePointerCast(
- builder.CreateGEP(mapData.BaseType[lastMemberIdx],
- mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
+ builder.CreateGEP(castType, mapData.BasePointers[lastMemberIdx],
+ builder.getInt64(1)),
builder.getPtrTy());
- combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
+ combinedInfo.Pointers.emplace_back(mapData.BasePointers[firstMemberIdx]);
}
llvm::Value *size = builder.CreateIntCast(
@@ -5787,9 +5948,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
/*isSigned=*/false);
combinedInfo.Sizes.push_back(size);
- llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
- ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
-
// This creates the initial MEMBER_OF mapping that consists of
// the parent/top level container (same as above effectively, except
// with a fixed initial compile time size and separate maptype which
@@ -5801,13 +5959,27 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
// for the map flags that Clang currently supports (e.g. it should do some
// further case specific flag modifications). For the moment, it handles
// what we support as expected.
- llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
- bool hasMapClose = (llvm::omp::OpenMPOffloadMappingFlags(mapFlag) &
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE) ==
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
+ MapFlags mapFlag = mapData.Types[mapDataIndex];
+ bool hasMapClose = (MapFlags(mapFlag) & MapFlags::OMP_MAP_CLOSE) ==
+ MapFlags::OMP_MAP_CLOSE;
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- if (targetDirective == TargetDirectiveEnumTy::TargetUpdate || hasMapClose) {
+ llvm::SmallVector<size_t> overlapIdxs;
+ // Find all of the members that "overlap", i.e. occlude other members that
+ // were mapped alongside the parent, e.g. member [0], occludes [0,1] and
+ // [0,2], but not [1,0].
+ getOverlappedMembers(overlapIdxs, parentClause);
+
+ // When we only have one overlap we skip the case that tries to segment the
+ // mapping as best it can without creating holes, as the calculation is more
+ // likely to have more overhead than anything we gain from mapping a smaller
+ // chunk of data. This can be seen in cases where we are mapping Fortran
+ // descriptors which are a special case of record type mapping.
+ //
+ // The cases for close and update are unique edge cases where the segmenting
+ // does not play well with the runtime currently.
+ if (targetDirective == TargetDirectiveEnumTy::TargetUpdate || hasMapClose ||
+ overlapIdxs.size() == 1) {
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
mapData.DevicePointers[mapDataIndex]);
@@ -5819,11 +5991,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
combinedInfo.Mappers.emplace_back(nullptr);
} else {
- llvm::SmallVector<size_t> overlapIdxs;
- // Find all of the members that "overlap", i.e. occlude other members that
- // were mapped alongside the parent, e.g. member [0], occludes [0,1] and
- // [0,2], but not [1,0].
- getOverlappedMembers(overlapIdxs, parentClause);
// We need to make sure the overlapped members are sorted in order of
// lowest address to highest address.
sortMapIndices(overlapIdxs, parentClause);
@@ -5835,6 +6002,13 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
mapData.Pointers[mapDataIndex], 1),
builder.getPtrTy());
+ // Currently, the return parameter should be the over-riding parent in
+ // cases where we have a return parameter that is echoed to all members,
+ // the main case of this currently is with fortran descriptors. It may
+ // need more finessing for C/C++ in the future or descriptors that are
+ // members of derived types.
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+
// TODO: We may want to skip arrays/array sections in this as Clang does.
// It appears to be an optimisation rather than a necessity though,
// but this requires further investigation. However, we would have to make
@@ -5844,31 +6018,39 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
auto mapDataOverlapIdx = getMapDataMemberIdx(
mapData,
cast<omp::MapInfoOp>(parentClause.getMembers()[v].getDefiningOp()));
+ auto isPtrMap = checkIfPointerMap(
+ llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataOverlapIdx]));
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
- mapData.DevicePointers[mapDataOverlapIdx]);
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
combinedInfo.BasePointers.emplace_back(
mapData.BasePointers[mapDataIndex]);
combinedInfo.Mappers.emplace_back(nullptr);
combinedInfo.Pointers.emplace_back(lowAddr);
- combinedInfo.Sizes.emplace_back(builder.CreateIntCast(
+ auto sizeCalc = builder.CreateIntCast(
builder.CreatePtrDiff(builder.getInt8Ty(),
mapData.OriginalValue[mapDataOverlapIdx],
lowAddr),
- builder.getInt64Ty(), /*isSigned=*/true));
+ builder.getInt64Ty(), /*isSigned=*/true);
+ // In certain cases, we'll generate a size of 0 if we're not careful
+ // (e.g. if lowAddr happens to be the first member), which isn't
+ // correct, even if the runtimes is sometimes fine with it so, in these
+ // scenarios we select the types size instead.
+ auto sizeSel = builder.CreateSelect(
+ builder.CreateICmpNE(builder.getInt64(0), sizeCalc), sizeCalc,
+ isPtrMap ? llvm::ConstantExpr::getSizeOf(builder.getPtrTy())
+ : mapData.Sizes[mapDataOverlapIdx]);
+ combinedInfo.Sizes.emplace_back(sizeSel);
lowAddr = builder.CreateConstGEP1_32(
- checkIfPointerMap(llvm::cast<omp::MapInfoOp>(
- mapData.MapClause[mapDataOverlapIdx]))
- ? builder.getPtrTy()
- : mapData.BaseType[mapDataOverlapIdx],
+ isPtrMap ? builder.getPtrTy() : mapData.BaseType[mapDataOverlapIdx],
mapData.BasePointers[mapDataOverlapIdx], 1);
}
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
- mapData.DevicePointers[mapDataIndex]);
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
combinedInfo.BasePointers.emplace_back(
@@ -5880,130 +6062,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
builder.getInt64Ty(), true));
}
}
- return memberOfFlag;
-}
-
-// This function is intended to add explicit mappings of members
-static void processMapMembersWithParent(
- LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
- llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
- MapInfoData &mapData, uint64_t mapDataIndex,
- llvm::omp::OpenMPOffloadMappingFlags memberOfFlag,
- TargetDirectiveEnumTy targetDirective) {
- assert(!ompBuilder.Config.isTargetDevice() &&
- "function only supported for host device codegen");
-
- auto parentClause =
- llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-
- for (auto mappedMembers : parentClause.getMembers()) {
- auto memberClause =
- llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
- int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
-
- assert(memberDataIdx >= 0 && "could not find mapped member of structure");
-
- // If we're currently mapping a pointer to a block of data, we must
- // initially map the pointer, and then attatch/bind the data with a
- // subsequent map to the pointer. This segment of code generates the
- // pointer mapping, which can in certain cases be optimised out as Clang
- // currently does in its lowering. However, for the moment we do not do so,
- // in part as we currently have substantially less information on the data
- // being mapped at this stage.
- if (checkIfPointerMap(memberClause)) {
- auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
- mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
- ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- combinedInfo.Types.emplace_back(mapFlag);
- combinedInfo.DevicePointers.emplace_back(
- llvm::OpenMPIRBuilder::DeviceInfoTy::None);
- combinedInfo.Mappers.emplace_back(nullptr);
- combinedInfo.Names.emplace_back(
- LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- combinedInfo.BasePointers.emplace_back(
- mapData.BasePointers[mapDataIndex]);
- combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
- combinedInfo.Sizes.emplace_back(builder.getInt64(
- moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
- }
-
- // Same MemberOfFlag to indicate its link with parent and other members
- // of.
- auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
- mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
- ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- bool isDeclTargetTo = isDeclareTargetTo(parentClause.getVarPtr()
- ? parentClause.getVarPtr()
- : parentClause.getVarPtrPtr());
- if (checkIfPointerMap(memberClause) &&
- (!isDeclTargetTo ||
- (targetDirective != TargetDirectiveEnumTy::TargetUpdate &&
- targetDirective != TargetDirectiveEnumTy::TargetData))) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
- }
-
- combinedInfo.Types.emplace_back(mapFlag);
- combinedInfo.DevicePointers.emplace_back(
- mapData.DevicePointers[memberDataIdx]);
- combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
- combinedInfo.Names.emplace_back(
- LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- uint64_t basePointerIndex =
- checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
- combinedInfo.BasePointers.emplace_back(
- mapData.BasePointers[basePointerIndex]);
- combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
-
- llvm::Value *size = mapData.Sizes[memberDataIdx];
- if (checkIfPointerMap(memberClause)) {
- size = builder.CreateSelect(
- builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
- builder.getInt64(0), size);
- }
-
- combinedInfo.Sizes.emplace_back(size);
- }
-}
-
-static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
- MapInfosTy &combinedInfo,
- TargetDirectiveEnumTy targetDirective,
- int mapDataParentIdx = -1) {
- // Declare Target Mappings are excluded from being marked as
- // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
- // marked with OMP_MAP_PTR_AND_OBJ instead.
- auto mapFlag = mapData.Types[mapDataIdx];
- auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
-
- bool isPtrTy = checkIfPointerMap(mapInfoOp);
- if (isPtrTy)
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
-
- if (targetDirective == TargetDirectiveEnumTy::Target &&
- !mapData.IsDeclareTarget[mapDataIdx])
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
-
- if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
- !isPtrTy)
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
-
- // if we're provided a mapDataParentIdx, then the data being mapped is
- // part of a larger object (in a parent <-> member mapping) and in this
- // case our BasePointer should be the parent.
- if (mapDataParentIdx >= 0)
- combinedInfo.BasePointers.emplace_back(
- mapData.BasePointers[mapDataParentIdx]);
- else
- combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
-
- combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
- combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
- combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
- combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
- combinedInfo.Types.emplace_back(mapFlag);
- combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
}
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
@@ -6035,18 +6093,42 @@ static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
// Clang maps array without bounds as pointers (which we do not
// currently do), whereas we treat them as arrays in all cases
// currently.
- processIndividualMap(mapData, memberDataIdx, combinedInfo, targetDirective,
- mapDataIndex);
+ processIndividualMap(
+ builder, ompBuilder, mapData, memberDataIdx, combinedInfo,
+ targetDirective,
+ /*MemberOfFlag=*/llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+ /*isTargetParam=*/true, mapDataIndex);
return;
}
- llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
+ auto collectMapInfoIdxs =
+ [&](llvm::SmallVectorImpl<int64_t> &mapsAndInfoIdx) {
+ auto parentClause =
+ llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
+ mapsAndInfoIdx.push_back(getMapDataMemberIdx(mapData, parentClause));
+ for (auto member : parentClause.getMembers())
+ mapsAndInfoIdx.push_back(getMapDataMemberIdx(
+ mapData, llvm::cast<omp::MapInfoOp>(member.getDefiningOp())));
+ };
+
+ llvm::SmallVector<int64_t> mapInfoIdx;
+ collectMapInfoIdxs(mapInfoIdx);
+
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+ ompBuilder.getMemberOfFlag(combinedInfo.Types.size());
+ for (size_t i = 0; i < mapInfoIdx.size(); i++) {
+ // Index == 0 is the parent map and if it gets here it's an unattachable
+ // type and should have OMP_MAP_TARGET_PARAM applied and no MEMBER_OF flag.
+ if (i == 0) {
mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
- combinedInfo, mapData, mapDataIndex,
+ combinedInfo, mapData, mapInfoIdx[i], memberOfFlag,
targetDirective);
- processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
- combinedInfo, mapData, mapDataIndex,
- memberOfParentFlag, targetDirective);
+ } else {
+ processIndividualMap(builder, ompBuilder, mapData, mapInfoIdx[i],
+ combinedInfo, targetDirective, memberOfFlag,
+ /*isTargetParam=*/false, mapDataIndex);
+ }
+ }
}
// This is a variation on Clang's GenerateOpenMPCapturedVars, which
@@ -6060,9 +6142,17 @@ createAlteredByCaptureMap(MapInfoData &mapData,
assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
"function only supported for host device codegen");
for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
- // if it's declare target, skip it, it's handled separately.
- if (!mapData.IsDeclareTarget[i]) {
- auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+ auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+ bool isAttachMap =
+ ((convertClauseMapFlags(mapOp.getMapType()) &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+
+ // If it's declare target, skip it, it's handled separately. However, if
+ // it's declare target, and an attach map, we want to calculate the exact
+ // address offset so that we attach correctly.
+ if (!mapData.IsDeclareTarget[i] ||
+ (mapData.IsDeclareTarget[i] && isAttachMap)) {
omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
bool isPtrTy = checkIfPointerMap(mapOp);
@@ -6149,8 +6239,6 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
// utilise the size from any component of MapInfoData, if we can't
// something is missing from the initial MapInfoData construction.
for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
- // NOTE/TODO: We currently do not support arbitrary depth record
- // type mapping.
if (mapData.IsAMember[i])
continue;
@@ -6161,7 +6249,8 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
continue;
}
- processIndividualMap(mapData, i, combinedInfo, targetDirective);
+ processIndividualMap(builder, *ompBuilder, mapData, i, combinedInfo,
+ targetDirective);
}
}
@@ -6249,7 +6338,8 @@ emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder,
};
llvm::Expected<llvm::Function *> newFn = ompBuilder->emitUserDefinedMapper(
- genMapInfoCB, varType, mapperFuncName, customMapperCB);
+ genMapInfoCB, varType, mapperFuncName, customMapperCB,
+ /*PreserveMemberOfFlags=*/true);
if (!newFn)
return newFn.takeError();
if ([[maybe_unused]] llvm::Function *mappedFunc =
@@ -6719,40 +6809,43 @@ handleDeclareTargetMapVar(MapInfoData &mapData,
// function to link the two variables in the runtime and then both the
// reference pointer and the pointer are assigned in the kernel argument
// structure for the host.
- if (mapData.IsDeclareTarget[i]) {
- // If the original map value is a constant, then we have to make sure all
- // of it's uses within the current kernel/function that we are going to
- // rewrite are converted to instructions, as we will be altering the old
- // use (OriginalValue) from a constant to an instruction, which will be
- // illegal and ICE the compiler if the user is a constant expression of
- // some kind e.g. a constant GEP.
- if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
- convertUsersOfConstantsToInstructions(constant, func, false);
-
- // The users iterator will get invalidated if we modify an element,
- // so we populate this vector of uses to alter each user on an
- // individual basis to emit its own load (rather than one load for
- // all).
- llvm::SmallVector<llvm::User *> userVec;
- for (llvm::User *user : mapData.OriginalValue[i]->users())
- userVec.push_back(user);
-
- for (llvm::User *user : userVec) {
- if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
- if (insn->getFunction() == func) {
- auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
- llvm::Value *substitute = mapData.BasePointers[i];
- if (isDeclareTargetLink(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
- : mapOp.getVarPtr())) {
- builder.SetCurrentDebugLocation(insn->getDebugLoc());
- substitute = builder.CreateLoad(
- mapData.BasePointers[i]->getType(), mapData.BasePointers[i]);
- cast<llvm::LoadInst>(substitute)->moveBefore(insn->getIterator());
- }
- user->replaceUsesOfWith(mapData.OriginalValue[i], substitute);
- }
- }
+ if (!mapData.IsDeclareTarget[i])
+ continue;
+ // If the original map value is a constant, then we have to make sure all
+ // of it's uses within the current kernel/function that we are going to
+ // rewrite are converted to instructions, as we will be altering the old
+ // use (OriginalValue) from a constant to an instruction, which will be
+ // illegal and ICE the compiler if the user is a constant expression of
+ // some kind e.g. a constant GEP.
+ if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
+ convertUsersOfConstantsToInstructions(constant, func, false);
+
+ // The users iterator will get invalidated if we modify an element,
+ // so we populate this vector of uses to alter each user on an
+ // individual basis to emit its own load (rather than one load for
+ // all).
+ llvm::SmallVector<llvm::User *> userVec;
+ for (llvm::User *user : mapData.OriginalValue[i]->users())
+ userVec.push_back(user);
+
+ for (llvm::User *user : userVec) {
+ auto *insn = dyn_cast<llvm::Instruction>(user);
+ if (!insn || insn->getFunction() != func)
+ continue;
+ auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+ llvm::Value *substitute = mapData.BasePointers[i];
+ auto declTarPtr =
+ mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
+ if (isDeclareTargetLink(declTarPtr) ||
+ (isDeclareTargetTo(declTarPtr) &&
+ moduleTranslation.getOpenMPBuilder()
+ ->Config.hasRequiresUnifiedSharedMemory())) {
+ builder.SetCurrentDebugLocation(insn->getDebugLoc());
+ substitute = builder.CreateLoad(mapData.BasePointers[i]->getType(),
+ mapData.BasePointers[i]);
+ cast<llvm::LoadInst>(substitute)->moveBefore(insn->getIterator());
}
+ user->replaceUsesOfWith(mapData.OriginalValue[i], substitute);
}
}
}
@@ -6823,7 +6916,7 @@ static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(
capture = mapOp.getMapCaptureType();
// Get information of alignment of mapped object
alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
- mapOp.getVarType(), ompBuilder.M.getDataLayout());
+ mapOp.getVarPtrType(), ompBuilder.M.getDataLayout());
// Find the corresponding entry block argument, which can be associated to
// a map, use_device* or has_device* clause.
@@ -7562,13 +7655,18 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
}
for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
- // declare target arguments are not passed to kernels as arguments
+ // 1) Declare target arguments are not passed to kernels as arguments.
+ // 2) Attach maps are not passed in as arguments to kernels.
+ // 3) Children of record objects are not passed in as arguments.
// TODO: We currently do not handle cases where a member is explicitly
// passed in as an argument, this will likley need to be handled in
// the near future, rather than using IsAMember, it may be better to
// test if the relevant BlockArg is used within the target region and
// then use that as a basis for exclusion in the kernel inputs.
- if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
+ bool isAttachMap = (mapData.Types[i] &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
+ if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i] && !isAttachMap)
kernelInput.push_back(mapData.OriginalValue[i]);
}
diff --git a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
index e0471f6f303fd..afa07c93851df 100644
--- a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
+++ b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
@@ -45,7 +45,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 :
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
%8 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
%9 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr, f32) -> !llvm.ptr {name = ""}
- %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to, attach) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"}
+ %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"}
+ %attach = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(ref_ptr_ptee, attach) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr, f32) -> !llvm.ptr {name = "scalar_alloc"}
omp.target map_entries(%10 -> %arg0 : !llvm.ptr) {
%14 = llvm.mlir.constant(1000000 : i32) : i32
%15 = llvm.mlir.constant(1 : i32) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir b/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
index ab59b597846bb..121c6ee83b85f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
@@ -15,7 +15,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
%5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
%6 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg3 : !llvm.ptr, i32) bounds(%3) -> !llvm.ptr
%7 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%6 : [0] : !llvm.ptr) -> !llvm.ptr
- %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg5 : !llvm.ptr, i32) -> !llvm.ptr
+ %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg5 : !llvm.ptr, f32) -> !llvm.ptr
%9 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%8 : [0] : !llvm.ptr) -> !llvm.ptr
%10 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
omp.target_data map_entries(%4, %5 : !llvm.ptr, !llvm.ptr) use_device_addr(%7 -> %arg6, %9 -> %arg7, %6 -> %arg8, %8 -> %arg9 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) use_device_ptr(%10 -> %arg10 : !llvm.ptr) {
@@ -67,23 +67,19 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: define void @mix_use_device_ptr_and_addr_and_map_(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
// CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_0_GEP]], align 8
-// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
// CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
-// CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_3_GEP]], align 8
+// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
// CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
// CHECK: %[[LOAD_BASEPTR_0:.*]] = load ptr, ptr %[[BASEPTR_0_GEP]], align 8
// store ptr %[[LOAD_BASEPTR_0]], ptr %[[ALLOCA]], align 8
-// CHECK: %[[LOAD_BASEPTR_2:.*]] = load ptr, ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[LOAD_BASEPTR_3:.*]] = load ptr, ptr %[[BASEPTR_3_GEP]], align 8
// CHECK: %[[GEP_A4:.*]] = getelementptr { i64 }, ptr %[[ARG_4]], i32 0, i32 0
// CHECK: %[[GEP_A7:.*]] = getelementptr { i64 }, ptr %[[ARG_7]], i32 0, i32 0
// CHECK: %[[LOAD_A4:.*]] = load i64, ptr %[[GEP_A4]], align 4
// CHECK: store i64 %[[LOAD_A4]], ptr %[[GEP_A7]], align 4
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[ARG_6]], ptr %[[LOAD_BASEPTR_2]], i32 48, i1 false)
// CHECK: %[[GEP_A6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG_6]], i32 0, i32 0
// CHECK: %[[LOAD_A6:.*]] = load ptr, ptr %[[GEP_A6]], align 8
// CHECK: %[[GEP_A6_2:.*]] = getelementptr i8, ptr %[[LOAD_A6]], i64 2
@@ -93,22 +89,19 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: define void @mix_use_device_ptr_and_addr_and_map_2(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
// CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_1_GEP]], align 8
-// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
// CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
+// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [8 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
// CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_3_GEP]], align 8
// CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
// CHECK: %[[LOAD_BASEPTR_1:.*]] = load ptr, ptr %[[BASEPTR_1_GEP]], align 8
// store ptr %[[LOAD_BASEPTR_1]], ptr %[[ALLOCA]], align 8
-// CHECK: %[[LOAD_BASEPTR_2:.*]] = load ptr, ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[LOAD_BASEPTR_3:.*]] = load ptr, ptr %[[BASEPTR_3_GEP]], align 8
// CHECK: %[[GEP_A4:.*]] = getelementptr { i64 }, ptr %[[ARG_4]], i32 0, i32 0
// CHECK: %[[GEP_A7:.*]] = getelementptr { i64 }, ptr %[[ARG_7]], i32 0, i32 0
// CHECK: %[[LOAD_A4:.*]] = load i64, ptr %[[GEP_A4]], align 4
// CHECK: store i64 %[[LOAD_A4]], ptr %[[GEP_A7]], align 4
-// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[ARG_6]], ptr %[[LOAD_BASEPTR_2]], i32 48, i1 false)
// CHECK: %[[GEP_A6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG_6]], i32 0, i32 0
// CHECK: %[[LOAD_A6:.*]] = load ptr, ptr %[[GEP_A6]], align 8
// CHECK: %[[GEP_A6_2:.*]] = getelementptr i8, ptr %[[LOAD_A6]], i64 2
diff --git a/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir b/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
new file mode 100644
index 0000000000000..61824cc0a39e1
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
@@ -0,0 +1,273 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Tests that we correctly lower the different variations of reference pointer
+// and attach semantics.
+
+module attributes {omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.target_triples = ["amdgcn-amd-amdhsa"], omp.version = #omp.version<version = 61>} {
+ llvm.func @attach_always_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, attach, ref_ptr, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @attach_never_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @attach_auto_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptr_ptee_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr, ref_ptee) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptr_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptee_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptr_ptee_attach_never_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr, ref_ptee) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 3, i64 16388, i64 288]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 3, i64 288]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384, i64 288]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710657, i64 1, i64 16384, i64 288]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 24, i64 0]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 16384, i64 33, i64 288]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 16384, i64 33, i64 288]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 1, i64 288]
+
+// CHECK: define void @attach_always_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoaddr ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoaddr ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_1_CMP:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_1_SEL:.*]] = select i1 %[[VAL_1_CMP]], i64 0, i64 4
+// CHECK: %[[VAL_2_CMP:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_2_SEL:.*]] = select i1 %[[VAL_2_CMP]], i64 0, i64 24
+
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_1_SEL]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_2_SEL]], ptr %[[SIZES]], align 8
+
+
+
+// CHECK: define void @attach_never_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoaddr ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoaddr ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_7:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_8:.*]] = select i1 %[[VAL_7]], i64 0, i64 4
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_8]], ptr %[[SIZES]], align 8
+
+
+// CHECK: define void @attach_auto_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoaddr ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoaddr ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_1_CMP:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_1_SEL:.*]] = select i1 %[[VAL_1_CMP]], i64 0, i64 4
+// CHECK: %[[VAL_2_CMP:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_2_SEL:.*]] = select i1 %[[VAL_2_CMP]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_1_SEL]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_2_SEL]], ptr %[[SIZES]], align 8
+
+
+// CHECK: define void @ref_ptr_ptee_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoaddr ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoaddr ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_1_CMP:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_1_SEL:.*]] = select i1 %[[VAL_1_CMP]], i64 0, i64 4
+// CHECK: %[[VAL_2_CMP:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_2_SEL:.*]] = select i1 %[[VAL_2_CMP]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_1_SEL]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_2_SEL]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_2:.*]] = select i1 %[[VAL_1]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_2]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+
+// CHECK: define void @ref_ptee_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_3:.*]] = select i1 %[[VAL_2]], i64 0, i64 24
+// CHECK: %[[VAL_4:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_5:.*]] = select i1 %[[VAL_4]], i64 0, i64 4
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_3]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_ptee_attach_never_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoaddr ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoaddr ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_7:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_8:.*]] = select i1 %[[VAL_7]], i64 0, i64 4
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_8]], ptr %[[SIZES]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index 6990ea001b6e4..b80e6220e6646 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -582,28 +582,24 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
// CHECK: %[[VAL_45:.*]] = getelementptr %[[VAL_18]], ptr %[[VAL_43]], i32 0, i32 0
// CHECK: %[[VAL_46:.*]] = call i64 @__tgt_mapper_num_components(ptr %[[VAL_37]])
// CHECK: %[[VAL_47:.*]] = shl i64 %[[VAL_46]], 48
-// CHECK: %[[VAL_48:.*]] = add nuw i64 3, %[[VAL_47]]
// CHECK: %[[VAL_49:.*]] = and i64 %[[VAL_22]], 3
// CHECK: %[[VAL_50:.*]] = icmp eq i64 %[[VAL_49]], 0
// CHECK: br i1 %[[VAL_50]], label %[[VAL_51:.*]], label %[[VAL_52:.*]]
// CHECK: omp.type.alloc: ; preds = %[[VAL_41]]
-// CHECK: %[[VAL_53:.*]] = and i64 %[[VAL_48]], -4
// CHECK: br label %[[VAL_42]]
// CHECK: omp.type.alloc.else: ; preds = %[[VAL_41]]
// CHECK: %[[VAL_54:.*]] = icmp eq i64 %[[VAL_49]], 1
// CHECK: br i1 %[[VAL_54]], label %[[VAL_55:.*]], label %[[VAL_56:.*]]
// CHECK: omp.type.to: ; preds = %[[VAL_52]]
-// CHECK: %[[VAL_57:.*]] = and i64 %[[VAL_48]], -3
// CHECK: br label %[[VAL_42]]
// CHECK: omp.type.to.else: ; preds = %[[VAL_52]]
// CHECK: %[[VAL_58:.*]] = icmp eq i64 %[[VAL_49]], 2
// CHECK: br i1 %[[VAL_58]], label %[[VAL_59:.*]], label %[[VAL_42]]
// CHECK: omp.type.from: ; preds = %[[VAL_56]]
-// CHECK: %[[VAL_60:.*]] = and i64 %[[VAL_48]], -2
// CHECK: br label %[[VAL_42]]
// CHECK: omp.type.end: ; preds = %[[VAL_59]], %[[VAL_56]], %[[VAL_55]], %[[VAL_51]]
-// CHECK: %[[VAL_61:.*]] = phi i64 [ %[[VAL_53]], %[[VAL_51]] ], [ %[[VAL_57]], %[[VAL_55]] ], [ %[[VAL_60]], %[[VAL_59]] ], [ %[[VAL_48]], %[[VAL_56]] ]
-// CHECK: call void @__tgt_push_mapper_component(ptr %[[VAL_37]], ptr %[[VAL_43]], ptr %[[VAL_45]], i64 4, i64 %[[VAL_61]], ptr @2)
+// CHECK: %[[VAL_61:.*]] = phi i64 [ 0, %[[VAL_51]] ], [ 1, %[[VAL_55]] ], [ 2, %[[VAL_59]] ], [ 3, %[[VAL_56]] ]
+// CHECK: call void @__tgt_push_mapper_component(ptr %[[VAL_37]], ptr %[[VAL_45]], ptr %[[VAL_45]], i64 4, i64 %[[VAL_61]], ptr @2)
// CHECK: %[[VAL_44]] = getelementptr %[[VAL_18]], ptr %[[VAL_43]], i32 1
// CHECK: %[[VAL_62:.*]] = icmp eq ptr %[[VAL_44]], %[[VAL_17]]
// CHECK: br i1 %[[VAL_62]], label %[[VAL_63:.*]], label %[[VAL_41]]
diff --git a/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir b/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir
index 837cb8a962555..69a76d787f49c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir
@@ -33,10 +33,12 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
}
// CHECK-LABEL: define void @test_mapper_combined_entries
-// CHECK: %[[MAPPERS:.*offload_mappers.*]] = alloca [5 x ptr]
-// CHECK: %[[MAPPER0:.*]] = getelementptr inbounds [5 x ptr], ptr %[[MAPPERS]], i64 0, i64 0
+// CHECK: %[[MAPPERS:.*offload_mappers.*]] = alloca [4 x ptr]
+// CHECK: %[[MAPPER0:.*]] = getelementptr inbounds [4 x ptr], ptr %[[MAPPERS]], i64 0, i64 0
// CHECK: store ptr @.omp_mapper.mapper, ptr %[[MAPPER0]]
-// CHECK: %[[MAPPER1:.*]] = getelementptr inbounds [5 x ptr], ptr %[[MAPPERS]], i64 0, i64 1
+// CHECK: %[[MAPPER1:.*]] = getelementptr inbounds [4 x ptr], ptr %[[MAPPERS]], i64 0, i64 1
// CHECK: store ptr null, ptr %[[MAPPER1]]
-// CHECK: %[[MAPPER2:.*]] = getelementptr inbounds [5 x ptr], ptr %[[MAPPERS]], i64 0, i64 2
+// CHECK: %[[MAPPER2:.*]] = getelementptr inbounds [4 x ptr], ptr %[[MAPPERS]], i64 0, i64 2
// CHECK: store ptr null, ptr %[[MAPPER2]]
+// CHECK: %[[MAPPER3:.*]] = getelementptr inbounds [4 x ptr], ptr %[[MAPPERS]], i64 0, i64 3
+// CHECK: store ptr null, ptr %[[MAPPER3]]
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
index 67946a03c4e25..cc12e1fde4bef 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
@@ -27,8 +27,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
}
}
-// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 48, i64 8, i64 0, i64 0]
-// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 288]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 0, i64 0]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 3, i64 288]
// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {
@@ -42,24 +42,19 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoaddr ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]] to i64
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptr %[[OFFLOAD_SIZES]], align 8
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
index 69e03ad421396..88a7b3b021faf 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
@@ -41,15 +41,15 @@ llvm.func @_QQmain() {
// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_3]], i64 0, i64 1
// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1
// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoaddr ptr %[[FIRST_MEMBER]] to i64
-// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoaddr ptr %[[LAST_MEMBER]] to i64
-// CHECK: %[[OFFLOAD_SIZE:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
+// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoaddr ptr %[[MEMBER_ACCESS_3]] to i64
+// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8
+// CHECK: store ptr %[[MEMBER_ACCESS_3]], ptr %[[PTR_ARR]], align 8
// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
-// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8
+// CHECK: store i64 %[[MEMBER_DIFF]], ptr %[[SIZE_ARR]], align 8
// CHECK: %[[BASE_PTR_ARR_2:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_2]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
index 8c2cdd7275fd9..f61c03e5a53e9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
@@ -25,33 +25,33 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
// CHECK: %struct.[[TSK_WTH_PRVTS:.*]] = type { %struct.kmp_task_ompbuilder_t, %struct.[[PRVTS:.*]] }
// CHECK: %struct.kmp_task_ompbuilder_t = type { ptr, ptr, i32, ptr, ptr }
-// CHECK: %struct.[[PRVTS]] = type { [7 x ptr], [7 x ptr], [7 x i64] }
+// CHECK: %struct.[[PRVTS]] = type { [5 x ptr], [5 x ptr], [5 x i64] }
// CHECK: define void @launch_(ptr captures(none) %0)
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
-// CHECK: %[[BASEPTRS:.*]] = alloca [7 x ptr], align 8
-// CHECK: %[[PTRS:.*]] = alloca [7 x ptr], align 8
-// CHECK: %[[MAPPERS:.*]] = alloca [7 x ptr], align 8
-// CHECK: %[[SIZES:.*]] = alloca [7 x i64], align 4
+// CHECK: %[[BASEPTRS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[PTRS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[MAPPERS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[SIZES:.*]] = alloca [5 x i64], align 4
-// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [7 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
-// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [7 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
-// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [7 x ptr], ptr %[[PTRS]], i32 0, i32 0
-// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [7 x i64], ptr %[[SIZES]], i32 0, i32 0
+// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[PTRS]], i32 0, i32 0
+// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [5 x i64], ptr %[[SIZES]], i32 0, i32 0
// CHECK: %[[GL_THRD_NUM:.*]] = call i32 @__kmpc_global_thread_num
-// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 208, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1)
+// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 160, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1)
// CHECK: %[[TSK_PTR:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 0
// CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TSK_PTR]], i32 0, i32 0
// CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[SHAREDS_PTR]], ptr align 1 %[[STRUCTARG]], i64 16, i1 false)
// CHECK: %[[VAL_50:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 1
// CHECK: %[[VAL_51:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 0
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 56, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 40, i1 false)
// CHECK: %[[VAL_53:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 1
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 56, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 40, i1 false)
// CHECK: %[[VAL_54:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 2
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 56, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 40, i1 false)
// CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_omp_task(ptr @4, i32 %[[GL_THRD_NUM]], ptr %[[TASK_DESC]])
// CHECK: define internal void @[[WORKER:.*]](i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir b/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir
index ca7e3ef3f5565..b7042a080216c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir
@@ -14,8 +14,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu =
}
}
-// CHECK: @.offload_sizes = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 4, i64 0]
-// CHECK: @.offload_maptypes = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710657, i64 281474976710657, i64 281474976710659, i64 288]
+// CHECK: @.offload_sizes = private unnamed_addr constant [4 x i64] [i64 0, i64 8, i64 4, i64 0]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 288]
// CHECK: %[[ALLOCA:.*]] = alloca %_QFTdtype, i64 1, align 8
// CHECK: %[[ELEMENT_ACC:.*]] = getelementptr %_QFTdtype, ptr %[[ALLOCA]], i32 0, i32 1
@@ -25,38 +25,19 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu =
// CHECK: %[[SIZE1_CALC_3:.*]] = ptrtoaddr ptr %[[ALLOCA]] to i64
// CHECK: %[[SIZE1_CALC_4:.*]] = sub i64 %[[SIZE1_CALC_2]], %[[SIZE1_CALC_3]]
-// CHECK: %[[SIZE2_CALC_1:.*]] = getelementptr %_QFTdtype, ptr %[[ALLOCA]], i32 1
-// CHECK: %[[SIZE2_CALC_2:.*]] = ptrtoaddr ptr %[[ELEMENT_ACC]] to i64
-// CHECK: %[[SIZE2_CALC_3:.*]] = ptrtoaddr ptr %[[ALLOCA]] to i64
-// CHECK: %[[SIZE2_CALC_4:.*]] = sub i64 %[[SIZE2_CALC_2]], %[[SIZE2_CALC_3]]
-
-// CHECK: %[[SIZE3_CALC_1:.*]] = getelementptr i32, ptr %[[ELEMENT_ACC]], i32 1
-// CHECK: %[[SIZE3_CALC_2:.*]] = ptrtoaddr ptr %[[SIZE2_CALC_1]] to i64
-// CHECK: %[[SIZE3_CALC_3:.*]] = ptrtoaddr ptr %[[SIZE3_CALC_1]] to i64
-// CHECK: %[[SIZE3_CALC_4:.*]] = sub i64 %[[SIZE3_CALC_2]], %[[SIZE3_CALC_3]]
-
-// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASEPTR]], align 8
-// CHECK: %[[PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: %[[PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr %[[ALLOCA]], ptr %[[PTRS]], align 8
-// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[SIZE1_CALC_4]], ptr %[[SIZES]], align 8
-// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASEPTR]], align 8
-// CHECK: %[[PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: %[[PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr %[[ALLOCA]], ptr %[[PTRS]], align 8
-// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
-// CHECK: store i64 %[[SIZE2_CALC_4]], ptr %[[SIZES]], align 8
-
-// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
-// CHECK: store ptr %[[ALLOCA]], ptr %[[BASEPTR]], align 8
-// CHECK: %[[PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-// CHECK: store ptr %11, ptr %[[PTRS]], align 8
-// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
-// CHECK: store i64 %[[SIZE3_CALC_4]], ptr %[[SIZES]], align 8
-// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASEPTR]], align 8
-// CHECK: %[[PTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: %[[PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
// CHECK: store ptr %[[ELEMENT_ACC]], ptr %[[PTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
index df16f15a3bad6..1333aebe143a1 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
@@ -39,15 +39,15 @@ llvm.func @_QQmain() {
// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_2]], i64 0, i64 1
// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1
// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoaddr ptr %[[FIRST_MEMBER]] to i64
-// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoaddr ptr %[[LAST_MEMBER]] to i64
-// CHECK: %[[OFFLOAD_SIZE:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
+// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoaddr ptr %[[MEMBER_ACCESS_2]] to i64
+// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8
+// CHECK: store ptr %[[MEMBER_ACCESS_2]], ptr %[[PTR_ARR]], align 8
// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
-// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8
+// CHECK: store i64 %[[MEMBER_DIFF]], ptr %[[SIZE_ARR]], align 8
// CHECK: %[[BASE_PTR_ARR_2:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR_2]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
index e6dac7d3d39d2..f5be881c6d79a 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
@@ -59,9 +59,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
// CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
-// CHECK: @.offload_sizes = private unnamed_addr constant [16 x i64] [i64 48, i64 0, i64 40, i64 8, i64 0, i64 48, i64 0, i64 40, i64 8, i64 0, i64 0, i64 0, i64 0, i64 8, i64 0, i64 0]
-// CHECK: @.offload_maptypes = private unnamed_addr constant [16 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 32, i64 1688849860263939, i64 1688849860263939, i64 1688849860263939, i64 1688849860263955, i64 32, i64 3096224743817219, i64 3096224743817219, i64 3096224743817219, i64 3096224743817235, i64 288]
-// CHECK: @.offload_mapnames = private constant [16 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr null]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [10 x i64] [i64 32, i64 281474976710659, i64 3, i64 32, i64 1125899906842627, i64 3, i64 32, i64 1970324836974595, i64 3, i64 288]
// CHECK: define void @main()
// CHECK: %[[SCALAR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
@@ -91,77 +89,48 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: %[[SCALAR_DESC_SZ4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i32 1
// CHECK: %[[SCALAR_DESC_SZ3:.*]] = ptrtoaddr ptr %[[SCALAR_DESC_SZ4]] to i64
// CHECK: %[[SCALAR_DESC_SZ2:.*]] = ptrtoaddr ptr %[[SCALAR_ALLOCA]] to i64
-// CHECK: %[[SCALAR_DESC_SZ:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]]
-// CHECK: %[[SCALAR_BASE_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i32 1
-// CHECK: %[[SCALAR_BASE_OFF:.*]] = getelementptr ptr, ptr %[[SCALAR_BASE]], i32 1
-// CHECK: %[[SCALAR_BASE_OFF_SZ1:.*]] = ptrtoaddr ptr %[[SCALAR_BASE_2]] to i64
-// CHECK: %[[SCALAR_BASE_OFF_SZ2:.*]] = ptrtoaddr ptr %[[SCALAR_BASE_OFF]] to i64
-// CHECK: %[[SCALAR_BASE_OFF_SZ3:.*]] = sub i64 %[[SCALAR_BASE_OFF_SZ1]], %[[SCALAR_BASE_OFF_SZ2]]
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[SCALAR_DESC_SZ1:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]]
+// CHECK: %[[NULL_CMP3:.*]] = icmp eq ptr %[[SCALAR_PTR_LOAD]], null
+// CHECK: %[[SZ_SEL:.*]] = select i1 %[[NULL_CMP3]], i64 0, i64 4
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-// CHECK: store ptr getelementptr inbounds nuw (i8, ptr @full_arr, i64 8), ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 2
// CHECK: store ptr %[[FULL_ARR_PTR]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [16 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 2
// CHECK: store i64 %[[IS_NULL]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 5
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 3
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 6
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 4
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-// CHECK: store ptr getelementptr inbounds nuw (i8, ptr @sect_arr, i64 8), ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 9
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 5
// CHECK: store ptr %[[ARR_SECT_PTR]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [16 x i64], ptr %.offload_sizes, i32 0, i32 9
-// CHECK: store i64 %[[IS_NULL2]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 10
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 10
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 6
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [16 x i64], ptr %.offload_sizes, i32 0, i32 10
-// CHECK: store i64 %[[SCALAR_DESC_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 11
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 6
+// CHECK: store i64 %[[SCALAR_DESC_SZ1]], ptr %[[OFFLOADSIZES]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 11
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 7
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 12
-// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 12
-// CHECK: store ptr %[[SCALAR_BASE_OFF]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [16 x i64], ptr %.offload_sizes, i32 0, i32 12
-// CHECK: store i64 %[[SCALAR_BASE_OFF_SZ3]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 13
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 13
-// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_baseptrs, i32 0, i32 14
-// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [16 x ptr], ptr %.offload_ptrs, i32 0, i32 14
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_ptrs, i32 0, i32 8
// CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [10 x i64], ptr %.offload_sizes, i32 0, i32 8
+// CHECK: store i64 %[[SZ_SEL]], ptr %[[OFFLOADSIZES]], align 8
diff --git a/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90 b/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
index 45a18b7f38ed3..5e33770648e34 100644
--- a/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
+++ b/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
@@ -5,7 +5,8 @@
! device.
! REQUIRES: flang, amdgpu
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
module test
contains
subroutine kernel_1d(array)
diff --git a/offload/test/offloading/fortran/map_attach_always.f90 b/offload/test/offloading/fortran/map_attach_always.f90
new file mode 100644
index 0000000000000..f5c7c5e6df921
--- /dev/null
+++ b/offload/test/offloading/fortran/map_attach_always.f90
@@ -0,0 +1,70 @@
+
+! This checks that attach always forces attachment.
+! NOTE: We have to make sure the old default auto attach behaviour is off to
+! yield the correct results for this test. Otherwise the second target will
+! be treated as if we'd had the attach always specified!
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: a(10)
+ integer, target :: b(10)
+ integer :: index, n
+ logical :: correct
+
+ n = 10
+ correct = .true.
+
+ do index = 1, n
+ a(index) = 10
+ b(index) = 20
+ end do
+
+ map_ptr => a
+
+ ! This should map a,b and map_ptr to device, and attach map_ptr
+ ! to a (as it is assigned to it above), and as b is already on
+ ! device running through target.
+ !$omp target enter data map(ref_ptr_ptee, to: map_ptr)
+ !$omp target enter data map(to: b, a)
+
+ !$omp target map(to: index) map(tofrom: correct)
+ do index = 1, n
+ if (map_ptr(index) /= 10) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ map_ptr => b
+
+ ! No attach always to force re-attachment, so we should still
+ ! be attached to "a"
+ !$omp target map(to: index) map(tofrom: correct)
+ do index = 1, n
+ if (map_ptr(index) /= 10) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ !$omp target map(to: index) map(attach(always): map_ptr) map(tofrom: correct)
+ do index = 1, n
+ if (map_ptr(index) /= 20) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ if (correct .NEQV. .true.) then
+ print*, "Failed!"
+ stop 1
+ endif
+
+ print*, "Passed!"
+end program
+
+!CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_attach_never.f90 b/offload/test/offloading/fortran/map_attach_never.f90
new file mode 100644
index 0000000000000..5b4d2dc5c6cd8
--- /dev/null
+++ b/offload/test/offloading/fortran/map_attach_never.f90
@@ -0,0 +1,55 @@
+! This checks that attach never prevents pointer attachment when specified.
+! NOTE: We have to make sure the old default auto attach behaviour is off to
+! yield the correct results for this test. Otherwise the second target will
+! be treated as if we'd had the attach always specified!
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: a(10)
+ integer, target :: b(10)
+ integer :: index, n
+ logical :: correct
+
+ correct = .true.
+ n = 10
+
+ do index = 1, n
+ a(index) = 10
+ b(index) = 20
+ end do
+
+ map_ptr => a
+
+ ! This should map a and map_ptr to device, and attach map_ptr
+ ! to a (as it is assigned to it above).
+ !$omp target enter data map(ref_ptr_ptee, to: map_ptr)
+
+ map_ptr => b
+
+ ! As "b" hasn't been mapped to device yet, the first time it's mapped will
+ ! be when map_ptr is re-mapped (implicitly or explicitly), the default behavior
+ ! when LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS is switched off would force attachment
+ ! of map_ptr to b as we've assigned it above. To prevent this and test the never
+ ! attachment, we can apply attach(never), which prevents this reattachment from
+ ! occurring
+ !$omp target map(to: index) map(tofrom: correct) map(attach(never): map_ptr)
+ do index = 1, n
+ if (map_ptr(index) /= 10) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ if (correct .NEQV. .true.) then
+ print*, "Failed!"
+ stop 1
+ endif
+
+ print*, "Passed!"
+end program
+
+!CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90 b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
new file mode 100644
index 0000000000000..05200768e3fcf
--- /dev/null
+++ b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
@@ -0,0 +1,47 @@
+! This checks that we can specify ref_ptee and ref_ptr, not encounter
+! an error and correctly map data to and from device.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-version=61
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: b(10)
+ integer :: index
+
+ map_ptr => b
+
+ ! Should have auto attach applied and automatically
+ ! attach to ref_ptee. So internally we implicitly apply
+ ! the attach map type.
+ !$omp target enter data map(ref_ptee, to: map_ptr)
+ !$omp target enter data map(ref_ptr, to: map_ptr)
+
+ ! should in theory memory access fault if we haven't attached
+ ! correctly above. But if all went well should go fine.
+ !$omp target map(to: index)
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ ! Don't care about the descriptor, but we do want to
+ ! deallocate it and only it and then map the data
+ ! back. Doing it in a weird-ish order to test we can
+ ! delete the descriptor separately and still pull the
+ ! data back.
+ !$omp target exit data map(ref_ptee, from: map_ptr)
+ !$omp target exit data map(ref_ptr, delete: map_ptr)
+
+ do index = 1, 10
+ if (map_ptr(index) /= index) then
+ print*, "Failed!"
+ stop 1
+ endif
+ end do
+
+ print*, "Passed!"
+end program
+
+! CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90 b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
new file mode 100644
index 0000000000000..b5fbfbe8df780
--- /dev/null
+++ b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
@@ -0,0 +1,47 @@
+! This checks that we can specify ref_ptee and ref_ptr, not encounter
+! an error and correctly map data to and from device. This does so
+! in a different order from map_ref_ptr_ptee_test_1.f90 to verify we
+! do not hit any odd runtime errors from mapping in a different order.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-version=61
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: b(10)
+ integer :: index
+
+ map_ptr => b
+
+ !$omp target enter data map(ref_ptr, to: map_ptr)
+ !$omp target enter data map(ref_ptee, to: map_ptr)
+
+ ! should in theory memory access fault if we haven't attached
+ ! correctly above. But if all went well should go fine.
+ !$omp target map(to: index)
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ ! Don't care about the descriptor, but we do want to
+ ! deallocate it and only it and then map the data
+ ! back. Doing it in a weird-ish order to test we can
+ ! delete the descriptor separately and still pull the
+ ! data back.
+ !$omp target exit data map(ref_ptr, delete: map_ptr)
+ !$omp target exit data map(ref_ptee, from: map_ptr)
+
+ do index = 1, 10
+ if (map_ptr(index) /= index) then
+ print*, "Failed!"
+ stop 1
+ endif
+ end do
+
+ print*, "Passed!"
+end program
+
+! CHECK: Passed
\ No newline at end of file
diff --git a/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90 b/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
index 8e1e68528943f..da9a8e70967f3 100644
--- a/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
+++ b/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
@@ -4,7 +4,8 @@
! directives
! REQUIRES: flang, amdgpu
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
module dtype
type :: my_dtype
integer :: s, e
More information about the llvm-branch-commits
mailing list