[llvm-branch-commits] [flang] [llvm] [mlir] [OpenMP][MLIR] Modify lowering OpenMP Dialect lowering to support attach mapping (PR #179023)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jan 31 01:03:51 PST 2026
https://github.com/agozillon created https://github.com/llvm/llvm-project/pull/179023
This PR adjusts the LLVM-IR lowering to support the new attach map type that the runtime uses to link data and pointer together, this swaps the mapping from the older OMP_MAP_PTR_AND_OBJ map type in most cases and allows slightly more complicated ref_ptr/ptee and attach semantics.
>From 7851c81db2d5cfff2caa4d31c23891a8c6ae76d3 Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Mon, 26 Jan 2026 11:15:19 -0600
Subject: [PATCH] [OpenMP][MLIR] Modify lowering OpenMP Dialect lowering to
support attach mapping
This PR adjusts the LLVM-IR lowering to support the new attach map type that the runtime
uses to link data and pointer together, this swaps the mapping from the older
OMP_MAP_PTR_AND_OBJ map type in most cases and allows slightly more complicated ref_ptr/ptee
and attach semantics.
---
.../OpenMP/map-types-and-sizes.f90 | 211 ++++---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 516 ++++++++++--------
.../allocatable_gpu_reduction_teams.mlir | 3 +-
.../omptarget-data-use-dev-ordering.mlir | 15 +-
.../LLVMIR/omptarget-host-ref-semantics.mlir | 364 ++++++++++++
...t-nested-ptr-record-type-mapping-host.mlir | 23 +-
...arget-nested-record-type-mapping-host.mlir | 4 +-
mlir/test/Target/LLVMIR/omptarget-nowait.mlir | 26 +-
.../omptarget-record-type-mapping-host.mlir | 4 +-
...rget-record-type-with-ptr-member-host.mlir | 80 ++-
.../descriptor-stack-jam-regression.f90 | 3 +-
.../offloading/fortran/map_attach_always.f90 | 70 +++
.../offloading/fortran/map_attach_never.f90 | 55 ++
.../fortran/map_ref_ptr_ptee_test_1.f90 | 48 ++
.../fortran/map_ref_ptr_ptee_test_2.f90 | 47 ++
...ap-pointer-to-dtype-allocatable-member.f90 | 3 +-
16 files changed, 1047 insertions(+), 425 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
create mode 100644 offload/test/offloading/fortran/map_attach_always.f90
create mode 100644 offload/test/offloading/fortran/map_attach_never.f90
create mode 100644 offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
create mode 100644 offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
index d6d93985d9895..98744da793fa0 100644
--- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
@@ -42,8 +42,7 @@ subroutine mapType_is_device_ptr
!$omp end target
end subroutine mapType_is_device_ptr
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 281474976711171, i64 281474976711187]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 515, i64 16384]
subroutine mapType_ptr
integer, pointer :: a
!$omp target
@@ -82,8 +81,7 @@ subroutine map_ompx_hold
!$omp end target data
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 281474976711171, i64 281474976711187]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976711173, i64 281474976711173, i64 515, i64 16384]
subroutine mapType_allocatable
integer, allocatable :: a
allocate(a)
@@ -93,8 +91,7 @@ subroutine mapType_allocatable
deallocate(a)
end subroutine mapType_allocatable
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16384]
subroutine mapType_ptr_explicit
integer, pointer :: a
!$omp target map(tofrom: a)
@@ -102,8 +99,7 @@ subroutine mapType_ptr_explicit
!$omp end target
end subroutine mapType_ptr_explicit
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [5 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16384]
subroutine mapType_allocatable_explicit
integer, allocatable :: a
allocate(a)
@@ -254,8 +250,8 @@ subroutine mapType_derived_explicit_nested_member_with_bounds
!$omp end target
end subroutine mapType_derived_explicit_nested_member_with_bounds
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384]
subroutine mapType_derived_type_alloca()
type :: one_layer
real(4) :: i
@@ -275,8 +271,8 @@ subroutine mapType_derived_type_alloca()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 0, i64 48, i64 0, i64 4, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 281474976710659, i64 16384, i64 16384]
subroutine mapType_alloca_derived_type()
type :: one_layer
real(4) :: i
@@ -298,8 +294,8 @@ subroutine mapType_alloca_derived_type()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [9 x i64] [i64 0, i64 0, i64 0, i64 0, i64 48, i64 0, i64 4, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 281474976710659, i64 16384, i64 16384]
subroutine mapType_alloca_nested_derived_type()
type :: middle_layer
real(4) :: i
@@ -329,8 +325,8 @@ subroutine mapType_alloca_nested_derived_type()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 3, i64 16384]
subroutine mapType_nested_derived_type_alloca()
type :: middle_layer
real(4) :: i
@@ -358,8 +354,8 @@ subroutine mapType_nested_derived_type_alloca()
!$omp end target
end subroutine
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [7 x i64] [i64 0, i64 64, i64 8, i64 0, i64 48, i64 8, i64 0]
-!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976710661, i64 281474976710656, i64 281474976710672, i64 281474976710661, i64 281474976710659, i64 281474976710675]
+!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [7 x i64] [i64 0, i64 64, i64 0, i64 48, i64 0, i64 0, i64 0]
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976710661, i64 0, i64 281474976710661, i64 3, i64 16384, i64 16384]
subroutine mapType_nested_derived_type_member_idx()
type :: vertexes
integer :: test
@@ -564,13 +560,14 @@ end subroutine mapType_common_block_members
!CHECK: %[[DESC_BASE_ADDR_DATA_SIZE:.*]] = mul i64 %[[MEMBER_BASE_ADDR_SIZE]], 4
!CHECK: %[[LOAD_ADDR_DATA:.*]] = load ptr, ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], align 8
!CHECK: %[[GEP_ADDR_DATA:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ADDR_DATA]], i64 0
+!CHECK: %[[LOAD_ADDR_DATA2:.*]] = load ptr, ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], align 8
+!CHECK: %[[GEP_ADDR_DATA2:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ADDR_DATA2]], i64 0
!CHECK: %[[MEMBER_ACCESS_ADDR_END:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i64 1
!CHECK: %[[MEMBER_ACCESS_ADDR_INT:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_ADDR_END]] to i64
!CHECK: %[[MEMBER_ACCESS_ADDR_BEGIN:.*]] = ptrtoint ptr %[[MEMBER_ACCESS]] to i64
!CHECK: %[[DTYPE_SEGMENT_SIZE:.*]] = sub i64 %[[MEMBER_ACCESS_ADDR_INT]], %[[MEMBER_ACCESS_ADDR_BEGIN]]
!CHECK: %[[DTYPE_SIZE_CALC:.*]] = sdiv exact i64 %[[DTYPE_SEGMENT_SIZE]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
!CHECK: %[[DTYPE_CMP:.*]] = icmp eq ptr %[[GEP_ADDR_DATA]], null
-!CHECK: %[[DTYPE_SEL:.*]] = select i1 %[[DTYPE_CMP]], i64 0, i64 %[[DESC_BASE_ADDR_DATA_SIZE]]
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
@@ -584,14 +581,11 @@ end subroutine mapType_common_block_members
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[GEP_ADDR_DATA2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[MEMBER_DESCRIPTOR_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[MEMBER_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
!CHECK: store ptr %array_offset, ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[DTYPE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_derived_type_{{.*}}
!CHECK: %{{.*}} = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8
@@ -612,7 +606,6 @@ end subroutine mapType_common_block_members
!CHECK: %[[DTYPE_BASE_ADDR_LOAD:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[DTYPE_ALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD]], i32 0, i32 4
!CHECK: %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], i32 0, i32 0
-
!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA]], i32 0, i32 0
!CHECK: %[[DTYPE_BASE_ADDR_LOAD_2:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_2]], align 8
!CHECK: %[[DTYPE_NONALLOCA_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %[[DTYPE_BASE_ADDR_LOAD_2]], i32 0, i32 5
@@ -622,63 +615,57 @@ end subroutine mapType_common_block_members
!CHECK: %[[MEMBER_SIZE_CALC_3:.*]] = mul i64 1, %[[MEMBER_SIZE_CALC_2]]
!CHECK: %[[MEMBER_SIZE_CALC_4:.*]] = mul i64 %[[MEMBER_SIZE_CALC_3]], 4
!CHECK: %[[DTYPE_BASE_ADDR_LOAD_3:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
+!CHECK: %[[DTYPE_BASE_ADDR_LOAD_3_1:.*]] = load ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], align 8
!CHECK: %[[LOAD_DTYPE_DESC_MEMBER:.*]] = load ptr, ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[MEMBER_ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DTYPE_DESC_MEMBER]], i64 0
-!CHECK: %[[DTYPE_END_OFFSET:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_END:.*]] = ptrtoint ptr %[[DTYPE_END_OFFSET]] to i64
-!CHECK: %[[DTYPE_BEGIN:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-!CHECK: %[[DTYPE_DESC_SZ_CALC:.*]] = sub i64 %[[DTYPE_END]], %[[DTYPE_BEGIN]]
-!CHECK: %[[DTYPE_DESC_SZ:.*]] = sdiv exact i64 %[[DTYPE_DESC_SZ_CALC]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3_OFF:.*]] = getelementptr ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], i32 1
-!CHECK: %[[SIZE_2_CALC_1:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_4]] to i64
-!CHECK: %[[SIZE_2_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]] to i64
-!CHECK: %[[SIZE_2_CALC_3:.*]] = sub i64 %[[SIZE_2_CALC_1]], %[[SIZE_2_CALC_2]]
-!CHECK: %[[SIZE_2_CALC_4:.*]] = sdiv exact i64 %[[SIZE_2_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[CMP_NULL:.*]] = icmp eq ptr %[[MEMBER_ARRAY_OFFSET]], null
-!CHECK: %[[NULL_SEL:.*]] = select i1 %[[CMP_NULL]], i64 0, i64 %[[MEMBER_SIZE_CALC_4]]
-
+!CHECK: %[[SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_2:.*]] = ptrtoint ptr %[[SIZE_CALC_1]] to i64
+!CHECK: %[[SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_4:.*]] = sub i64 %[[SIZE_CALC_2]], %[[SIZE_CALC_3]]
+!CHECK: %[[SIZE_CALC_5:.*]] = sdiv exact i64 %[[SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[SIZE_CALC_6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_7:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3]] to i64
+!CHECK: %[[SIZE_CALC_8:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_9:.*]] = sub i64 %[[SIZE_CALC_7]], %[[SIZE_CALC_8]]
+!CHECK: %[[CALC_SIZE_1:.*]] = sdiv exact i64 %[[SIZE_CALC_9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[OFFLOAD_PTR:.*]] = getelementptr ptr, ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], i32 1
+!CHECK: %[[SIZE_CALC_10:.*]] = ptrtoint ptr %[[SIZE_CALC_6]] to i64
+!CHECK: %[[SIZE_CALC_11:.*]] = ptrtoint ptr %[[OFFLOAD_PTR]] to i64
+!CHECK: %[[SIZE_CALC_12:.*]] = sub i64 %[[SIZE_CALC_10]], %[[SIZE_CALC_11]]
+!CHECK: %[[SIZE_CALC_13:.*]] = sdiv exact i64 %[[SIZE_CALC_12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[DTYPE_BASE_ADDR_LOAD_3_1]], null
+!CHECK: %[[SEL_SZ:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 136
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %array_offset1, null
+!CHECK: %[[SEL_SZ2:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[MEMBER_SIZE_CALC_4]]
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DTYPE_DESC_SZ]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 1
+!CHECK: store i64 %[[CALC_SIZE_1]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2
-!CHECK: store i64 %[[SIZE_2_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[OFFLOAD_PTR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_BASE_ADDR_LOAD_3_1]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_LOAD_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[MEMBER_ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-!CHECK: store ptr %[[DTYPE_ALLOCA_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-!CHECK: store ptr %[[MEMBER_ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 7
-!CHECK: store i64 %[[NULL_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
!CHECK: store ptr %[[DTYPE_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_alloca_nested_derived_type{{.*}}
@@ -705,62 +692,57 @@ end subroutine mapType_common_block_members
!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_4:.*]] = mul i64 1, %[[ALLOCATABLE_MEMBER_SIZE_CALC_3]]
!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
+!CHECK: %[[LOAD_BASE_ADDR2:.*]] = load ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], align 8
!CHECK: %[[LOAD_DESC_MEMBER_BASE_ADDR:.*]] = load ptr, ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[ARRAY_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[LOAD_DESC_MEMBER_BASE_ADDR]], i64 0
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_DESC_SIZE_CALC_1]] to i64
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_4:.*]] = sub i64 %[[DTYPE_DESC_SIZE_CALC_2]], %[[DTYPE_DESC_SIZE_CALC_3]]
-!CHECK: %[[DTYPE_DESC_SIZE_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_DESC_SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
-!CHECK: %[[DTYPE_BASE_ADDR_ACCESS_3_OFF:.*]] = getelementptr ptr, ptr %[[DTYPE_DESC_BASE_ADDR]], i32 1
-!CHECK: %[[SIZE_2_CALC_1:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3]] to i64
-!CHECK: %[[SIZE_2_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]] to i64
-!CHECK: %[[SIZE_2_CALC_3:.*]] = sub i64 %[[SIZE_2_CALC_1]], %[[SIZE_2_CALC_2]]
-!CHECK: %[[SIZE_2_CALC_4:.*]] = sdiv exact i64 %[[SIZE_2_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[SIZE_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_2:.*]] = ptrtoint ptr %[[SIZE_CALC_1]] to i64
+!CHECK: %[[SIZE_CALC_3:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_4:.*]] = sub i64 %[[SIZE_CALC_2]], %[[SIZE_CALC_3]]
+!CHECK: %[[SIZE_CALC_5:.*]] = sdiv exact i64 %[[SIZE_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[SIZE_CALC_6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_3]], i32 1
+!CHECK: %[[SIZE_CALC_7:.*]] = ptrtoint ptr %36 to i64
+!CHECK: %[[SIZE_CALC_8:.*]] = ptrtoint ptr %[[DTYPE_DESC_ALLOCA_3]] to i64
+!CHECK: %[[SIZE_CALC_9:.*]] = sub i64 %[[SIZE_CALC_7]], %[[SIZE_CALC_8]]
+!CHECK: %[[CALC_SIZE:.*]] = sdiv exact i64 %[[SIZE_CALC_9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[OFFLOAD_PTR:.*]] = getelementptr ptr, ptr %36, i32 1
+!CHECK: %[[SIZE_CALC_10:.*]] = ptrtoint ptr %[[SIZE_CALC_6]] to i64
+!CHECK: %[[SIZE_CALC_11:.*]] = ptrtoint ptr %[[OFFLOAD_PTR]] to i64
+!CHECK: %[[SIZE_CALC_12:.*]] = sub i64 %[[SIZE_CALC_10]], %[[SIZE_CALC_11]]
+!CHECK: %[[SIZE_CALC_13:.*]] = sdiv exact i64 %[[SIZE_CALC_12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[LOAD_BASE_ADDR2]], null
+!CHECK: %[[SEL_SZ:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 240
!CHECK: %[[NULL_CMP:.*]] = icmp eq ptr %[[ARRAY_OFFSET]], null
-!CHECK: %[[NULL_SEL:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
+!CHECK: %[[SEL_SZ2:.*]] = select i1 %[[NULL_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DTYPE_DESC_SIZE_CALC_5]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 1
+!CHECK: store i64 %[[CALC_SIZE]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[DTYPE_BASE_ADDR_ACCESS_3_OFF]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2
-!CHECK: store i64 %[[SIZE_2_CALC_4]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[OFFLOAD_PTR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[LOAD_BASE_ADDR2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-!CHECK: store ptr %[[DTYPE_DESC_BASE_ADDR]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[LOAD_BASE_ADDR]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[MAPPED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[MAPPED_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6
-!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
-!CHECK: store ptr %[[MAPPED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7
-!CHECK: store ptr %[[ARRAY_OFFSET]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 7
-!CHECK: store i64 %[[NULL_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-!CHECK: store ptr %[[DTYPE_DESC_ALLOCA_3]], ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
!CHECK: store ptr %[[NESTED_NONALLOCA_MEMBER_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_alloca{{.*}}
@@ -778,12 +760,14 @@ end subroutine mapType_common_block_members
!CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_5:.*]] = mul i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_4]], 4
!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], align 8
!CHECK: %[[ARR_OFFS:.*]] = getelementptr inbounds i32, ptr %[[LOAD_BASE_ADDR]], i64 0
+!CHECK: %[[LOAD_BASE_ADDR:.*]] = load ptr, ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], align 8
+!CHECK: %[[ARR_OFFS2:.*]] = getelementptr inbounds i32, ptr %[[LOAD_BASE_ADDR]], i64 0
!CHECK: %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_MEMBER_ACCESS]], i64 1
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_1:.*]] = ptrtoint ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS_2]] to i64
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_2:.*]] = ptrtoint ptr %[[NESTED_MEMBER_ACCESS]] to i64
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_3:.*]] = sub i64 %[[DTYPE_SEGMENT_SIZE_CALC_1]], %[[DTYPE_SEGMENT_SIZE_CALC_2]]
!CHECK: %[[DTYPE_SEGMENT_SIZE_CALC_4:.*]] = sdiv exact i64 %[[DTYPE_SEGMENT_SIZE_CALC_3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[DATA_CMP:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[DATA_CMP:.*]] = icmp eq ptr %[[ARR_OFFS2]], null
!CHECK: %[[DATA_SEL:.*]] = select i1 %[[DATA_CMP]], i64 0, i64 %[[ALLOCATABLE_MEMBER_SIZE_CALC_5]]
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
@@ -798,13 +782,13 @@ end subroutine mapType_common_block_members
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[DATA_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[NESTED_MEMBER_BASE_ADDR_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[NESTED_MEMBER_ACCESS]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[DATA_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_nested_derived_type_member_idx{{.*}}
!CHECK: %[[ALLOCA_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, align 8
@@ -845,15 +829,21 @@ end subroutine mapType_common_block_members
!CHECK: %[[ARR_OFFS:.*]] = getelementptr inbounds %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[LOAD_OFF_PTR]], i64 0
!CHECK: %[[LOAD_ARR_OFFS:.*]] = load ptr, ptr %[[OFF_PTR_4]], align 8
!CHECK: %[[ARR_OFFS_1:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ARR_OFFS]], i64 0
+!CHECK: %[[LOAD_OFF_PTR:.*]] = load ptr, ptr %[[OFF_PTR_2]], align 8
+!CHECK: %[[ARR_OFFS_2:.*]] = getelementptr inbounds %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[LOAD_OFF_PTR]], i64 0
+!CHECK: %[[LOAD_ARR_OFFS:.*]] = load ptr, ptr %[[OFF_PTR_4]], align 8
+!CHECK: %[[ARR_OFFS_3:.*]] = getelementptr inbounds i32, ptr %[[LOAD_ARR_OFFS]], i64 0
!CHECK: %[[SZ_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[OFF_PTR_1]], i64 1
!CHECK: %[[SZ_CALC_2:.*]] = ptrtoint ptr %[[SZ_CALC_1]] to i64
!CHECK: %[[SZ_CALC_3:.*]] = ptrtoint ptr %[[OFF_PTR_1]] to i64
!CHECK: %[[SZ_CALC_4:.*]] = sub i64 %[[SZ_CALC_2]], %[[SZ_CALC_3]]
!CHECK: %[[SZ_CALC_5:.*]] = sdiv exact i64 %[[SZ_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-!CHECK: %[[SIZE_CMP:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[SIZE_CMP:.*]] = icmp eq ptr %[[ARR_OFFS_2]], null
!CHECK: %[[SIZE_SEL:.*]] = select i1 %[[SIZE_CMP]], i64 0, i64 %[[OFF_PTR_3]]
-!CHECK: %[[SIZE_CMP2:.*]] = icmp eq ptr %[[ARR_OFFS_1]], null
+!CHECK: %[[SIZE_CMP2:.*]] = icmp eq ptr %[[ARR_OFFS_3]], null
!CHECK: %[[SIZE_SEL2:.*]] = select i1 %[[SIZE_CMP2]], i64 0, i64 %[[SZ_CALC_4_2]]
+!CHECK: %[[SIZE_CMP3:.*]] = icmp eq ptr %[[ARR_OFFS]], null
+!CHECK: %[[SIZE_SEL3:.*]] = select i1 %[[SIZE_CMP3]], i64 0, i64 64
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 0
@@ -867,32 +857,27 @@ end subroutine mapType_common_block_members
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-!CHECK: store ptr %[[OFF_PTR_2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS_2]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 2
+!CHECK: store i64 %[[SIZE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-!CHECK: store ptr %[[OFF_PTR_2]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 3
-!CHECK: store i64 %[[SIZE_SEL]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 4
-!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS_3]], ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
-!CHECK: store ptr %[[BASE_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[OFF_PTR_1]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 5
-!CHECK: store ptr %[[OFF_PTR_4]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: store ptr %[[ARR_OFFS]], ptr %[[OFFLOAD_PTR_ARR]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 5
+!CHECK: store i64 %[[SIZE_SEL3]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
-!CHECK: store ptr %[[OFF_PTR_4]], ptr %[[BASE_PTR_ARR]], align 8
+!CHECK: store ptr %[[SZ_CALC_8]], ptr %[[BASE_PTR_ARR]], align 8
!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [7 x ptr], ptr %.offload_ptrs, i32 0, i32 6
!CHECK: store ptr %[[ARR_OFFS_1]], ptr %[[OFFLOAD_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [7 x i64], ptr %.offload_sizes, i32 0, i32 6
-!CHECK: store i64 %[[SIZE_SEL2]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
-!CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_{{.*}}
-!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
-!CHECK: store ptr @var_common_, ptr %[[BASE_PTR_ARR]], align 8
-!CHECK: %[[OFFLOAD_PTR_ARR:.*]] = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-!CHECK: store ptr @var_common_, ptr %[[OFFLOAD_PTR_ARR]], align 8
!CHECK-LABEL: define {{.*}} @{{.*}}maptype_common_block_members_{{.*}}
!CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c72a93bfe92df..a7bf028b308ba 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4370,6 +4370,11 @@ static llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
return builder.getInt64(dl.getTypeSizeInBits(type) / 8);
}
+static bool checkHasClauseMapFlag(omp::ClauseMapFlags flag,
+ omp::ClauseMapFlags checkFlag) {
+ return (flag & checkFlag) == checkFlag;
+}
+
// Convert the MLIR map flag set to the runtime map flag set for embedding
// in LLVM-IR. This is important as the two bit-flag lists do not correspond
// 1-to-1 as there's flags the runtime doesn't care about and vice versa.
@@ -4386,40 +4391,40 @@ convertClauseMapFlags(omp::ClauseMapFlags mlirFlags) {
llvm::omp::OpenMPOffloadMappingFlags mapType =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
- if (mapTypeToBool(omp::ClauseMapFlags::to))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::to))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
- if (mapTypeToBool(omp::ClauseMapFlags::from))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::from))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
- if (mapTypeToBool(omp::ClauseMapFlags::always))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::always))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
- if (mapTypeToBool(omp::ClauseMapFlags::del))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::del))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
- if (mapTypeToBool(omp::ClauseMapFlags::return_param))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::return_param))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
- if (mapTypeToBool(omp::ClauseMapFlags::priv))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::priv))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE;
- if (mapTypeToBool(omp::ClauseMapFlags::literal))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::literal))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
- if (mapTypeToBool(omp::ClauseMapFlags::implicit))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::implicit))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
- if (mapTypeToBool(omp::ClauseMapFlags::close))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::close))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
- if (mapTypeToBool(omp::ClauseMapFlags::present))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::present))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
- if (mapTypeToBool(omp::ClauseMapFlags::ompx_hold))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::ompx_hold))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
- if (mapTypeToBool(omp::ClauseMapFlags::attach))
+ if (checkHasClauseMapFlag(mlirFlags, omp::ClauseMapFlags::attach))
mapType |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
if (mapTypeToBool(omp::ClauseMapFlags::is_device_ptr)) {
@@ -4437,6 +4442,16 @@ static void collectMapDataFromMapOperands(
llvm::IRBuilderBase &builder, ArrayRef<Value> useDevPtrOperands = {},
ArrayRef<Value> useDevAddrOperands = {},
ArrayRef<Value> hasDevAddrOperands = {}) {
+
+ auto checkRefPtrOrPteeMapWithAttach = [](omp::ClauseMapFlags mapType) {
+ bool hasRefType =
+ checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::ref_ptr) ||
+ checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::ref_ptee) ||
+ checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::ref_ptr_ptee);
+ return hasRefType &&
+ checkHasClauseMapFlag(mapType, omp::ClauseMapFlags::attach);
+ };
+
auto checkIsAMember = [](const auto &mapVars, auto mapOp) {
// Check if this is a member mapping and correctly assign that it is, if
// it is a member of a larger object.
@@ -4457,10 +4472,16 @@ static void collectMapDataFromMapOperands(
// Process MapOperands
for (Value mapValue : mapVars) {
auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
- Value offloadPtr =
- mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr() : mapOp.getVarPtr();
+ bool isRefPtrOrPteeMapWithAttach =
+ checkRefPtrOrPteeMapWithAttach(mapOp.getMapType());
+ Value offloadPtr = (mapOp.getVarPtrPtr() && !isRefPtrOrPteeMapWithAttach)
+ ? mapOp.getVarPtrPtr()
+ : mapOp.getVarPtr();
mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
- mapData.Pointers.push_back(mapData.OriginalValue.back());
+ mapData.Pointers.push_back(
+ isRefPtrOrPteeMapWithAttach
+ ? moduleTranslation.lookupValue(mapOp.getVarPtrPtr())
+ : mapData.OriginalValue.back());
if (llvm::Value *refPtr =
getRefPtrIfDeclareTarget(offloadPtr, moduleTranslation)) {
@@ -4477,12 +4498,22 @@ static void collectMapDataFromMapOperands(
// In every situation we currently have if we have a varPtrPtr present
// we wish to utilise it's type for the base type, main cases are
// currently Fortran descriptor base address maps and attach maps.
- mlir::Type baseTy = mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
- : mapOp.getVarPtrType();
- mapData.BaseType.push_back(moduleTranslation.convertType(baseTy));
- mapData.Sizes.push_back(
- getSizeInBytes(dl, baseTy, mapOp, mapData.Pointers.back(),
- mapData.BaseType.back(), builder, moduleTranslation));
+ mapData.BaseType.push_back(moduleTranslation.convertType(
+ mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
+ : mapOp.getVarPtrType()));
+
+ // For the attach map cases, it's a little odd, as we effectively have to
+ // utilise the base address (including all bounds offsets) for the pointer
+ // field, the pointer address for the base address field, and the pointer
+ // not the data (base addresses) size. So we end up with a mix of base
+ // types and sizes we wish to insert here.
+ mlir::Type sizeType = (isRefPtrOrPteeMapWithAttach || !mapOp.getVarPtrPtr())
+ ? mapOp.getVarPtrType()
+ : mapOp.getVarPtrPtrType().value();
+ mapData.Sizes.push_back(getSizeInBytes(
+ dl, sizeType, isRefPtrOrPteeMapWithAttach ? nullptr : mapOp,
+ mapData.Pointers.back(), moduleTranslation.convertType(sizeType),
+ builder, moduleTranslation));
mapData.MapClause.push_back(mapOp.getOperation());
mapData.Types.push_back(convertClauseMapFlags(mapOp.getMapType()));
mapData.Names.push_back(LLVM::createMappingInformation(
@@ -4499,11 +4530,18 @@ static void collectMapDataFromMapOperands(
}
auto findMapInfo = [&mapData](llvm::Value *val,
- llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy) {
+ llvm::OpenMPIRBuilder::DeviceInfoTy devInfoTy,
+ size_t memberCount) {
unsigned index = 0;
bool found = false;
for (llvm::Value *basePtr : mapData.OriginalValue) {
- if (basePtr == val && mapData.IsAMapping[index]) {
+ auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[index]);
+ // TODO/FIXME: Currently we define an equivelant mapping as
+ // the same base pointer and an equivelant member count, but
+ // that is a loose definition, we may have to extend to check
+ // for other fields (varPtrPtr/invidiual members being mapped)
+ if (basePtr == val && mapData.IsAMapping[index] &&
+ memberCount == mapOp.getMembers().size()) {
found = true;
mapData.Types[index] |=
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
@@ -4524,15 +4562,31 @@ static void collectMapDataFromMapOperands(
llvm::Value *origValue = moduleTranslation.lookupValue(offloadPtr);
// Check if map info is already present for this entry.
- if (!findMapInfo(origValue, devInfoTy)) {
+ if (!findMapInfo(origValue, devInfoTy, mapOp.getMembers().size())) {
mapData.OriginalValue.push_back(origValue);
mapData.Pointers.push_back(mapData.OriginalValue.back());
mapData.IsDeclareTarget.push_back(false);
mapData.BasePointers.push_back(mapData.OriginalValue.back());
- mapData.BaseType.push_back(moduleTranslation.convertType(
- mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtrType().value()
- : mapOp.getVarPtrType()));
- mapData.Sizes.push_back(builder.getInt64(0));
+ mlir::Type baseTy = mapOp.getVarPtrPtr()
+ ? mapOp.getVarPtrPtrType().value()
+ : mapOp.getVarPtrType();
+ mapData.BaseType.push_back(moduleTranslation.convertType(baseTy));
+
+ // If we're an attach map, we need to maintain the size currently, even
+ // if we're not sending data, as the runtime (at least currently)
+ // expects a size greater than 0. An alternative may be to skip attach
+ // maps when applied to use_dev_ptr/addr and there's no other map type
+ // present.
+ if ((convertClauseMapFlags(mapOp.getMapType()) &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) {
+ mapData.Sizes.push_back(getSizeInBytes(
+ dl, baseTy, mapOp, mapData.Pointers.back(),
+ mapData.BaseType.back(), builder, moduleTranslation));
+ } else {
+ mapData.Sizes.push_back(builder.getInt64(0));
+ }
+
mapData.MapClause.push_back(mapOp.getOperation());
mapData.Types.push_back(
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
@@ -4829,6 +4883,93 @@ static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
return false;
}
+static void
+processIndividualMap(llvm::IRBuilderBase &builder,
+ llvm::OpenMPIRBuilder &ompBuilder, MapInfoData &mapData,
+ size_t mapDataIdx, MapInfosTy &combinedInfo,
+ TargetDirectiveEnumTy targetDirective,
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+ bool isTargetParam = true, int mapDataParentIdx = -1) {
+ auto mapFlag = mapData.Types[mapDataIdx];
+ auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
+
+ bool isPtrTy = checkIfPointerMap(mapInfoOp);
+ bool isAttachMap = ((convertClauseMapFlags(mapInfoOp.getMapType()) &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+
+ // Declare Target Mappings are excluded from being marked as
+ // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
+ // marked with OMP_MAP_PTR_AND_OBJ instead. It's also the only type of
+ // mapping that currently utilises OMP_MAP_PTR_AND_OBJ after moving to
+ // attach map semantics.
+ if (isPtrTy && mapData.IsDeclareTarget[mapDataIdx])
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+
+ // Declare target variables are not passed to the kernel, and for the moment
+ // attach maps are not passed to the kernel, however, it is possible to create
+ // attach maps that transfer data and thus can be kernel arguments, but our
+ // existing frontend does not do this.
+ if (isTargetParam &&
+ (targetDirective == TargetDirectiveEnumTy::Target &&
+ !mapData.IsDeclareTarget[mapDataIdx]) &&
+ !isAttachMap)
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+
+ if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
+ !isPtrTy)
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+
+ // if we have a pointer and it's part of a MEMBER_OF mapping we do not apply
+ // MEMBER_OF, as the runtime currently has a work-around that utilises
+ // MEMBER_OF to prevent reference updating in certain scenarios instead of
+ // target_param, however, this causes a noticable issue in cases where we
+ // map some data (Fortran descriptor primarily at the moment), alter it on
+ // the host, and then expect it to not be updated in a subsequent impliict map
+ // (such as an implicit map on a target).
+ if (memberOfFlag != llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE) {
+ if (!isPtrTy && !isAttachMap)
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+
+ // The return parameter should be the over-riding parent in cases where we
+ // have a return parameter that is echoed to all members, the main case of
+ // this currently is with fortran descriptors. It may need more finessing
+ // for C/C++ in the future or descriptors that are members of derived
+ // types.
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ }
+
+ // if we're provided a mapDataParentIdx, then the data being mapped is
+ // part of a larger object (in a parent <-> member mapping) and in this
+ // case our BasePointer should be the parent. Except in the edge case
+ // where we are mapping pointee data, in this case we try stay close to
+ // what Clang currently does and utilise the regular base pointer of the
+ // data.
+ if (mapDataParentIdx >= 0 &&
+ !(checkHasClauseMapFlag(mapInfoOp.getMapType(),
+ omp::ClauseMapFlags::ref_ptee) ||
+ (checkHasClauseMapFlag(mapInfoOp.getMapType(),
+ omp::ClauseMapFlags::ref_ptr_ptee) &&
+ isPtrTy))) {
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataParentIdx]);
+ } else {
+ combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
+ }
+
+ combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
+ combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
+ combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
+ combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.Sizes.emplace_back(
+ isPtrTy ? builder.CreateSelect(
+ builder.CreateIsNull(mapData.Pointers[mapDataIdx]),
+ builder.getInt64(0), mapData.Sizes[mapDataIdx])
+ : mapData.Sizes[mapDataIdx]);
+}
+
// This creates two insertions into the MapInfosTy data structure for the
// "parent" of a set of members, (usually a container e.g.
// class/structure/derived type) when subsequent members have also been
@@ -4844,38 +4985,42 @@ static bool checkIfPointerMap(omp::MapInfoOp mapOp) {
//
// This function borrows a lot from Clang's emitCombinedEntry function
// inside of CGOpenMPRuntime.cpp
-static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
+static void mapParentWithMembers(
LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
MapInfoData &mapData, uint64_t mapDataIndex,
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag,
TargetDirectiveEnumTy targetDirective) {
+ using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
assert(!ompBuilder.Config.isTargetDevice() &&
"function only supported for host device codegen");
-
auto parentClause =
llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-
auto *parentMapper = mapData.Mappers[mapDataIndex];
// Map the first segment of the parent. If a user-defined mapper is attached,
// include the parent's to/from-style bits (and common modifiers) in this
// base entry so the mapper receives correct copy semantics via its 'type'
// parameter. Also keep TARGET_PARAM when required for kernel arguments.
- llvm::omp::OpenMPOffloadMappingFlags baseFlag =
- (targetDirective == TargetDirectiveEnumTy::Target &&
- !mapData.IsDeclareTarget[mapDataIndex])
- ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
- : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+ mapFlags baseFlag = (targetDirective == TargetDirectiveEnumTy::Target &&
+ !mapData.IsDeclareTarget[mapDataIndex])
+ ? mapFlags::OMP_MAP_TARGET_PARAM
+ : mapFlags::OMP_MAP_NONE;
if (parentMapper) {
- using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
// Preserve relevant map-type bits from the parent clause. These include
// the copy direction (TO/FROM), as well as commonly used modifiers that
// should be visible to the mapper for correct behaviour.
mapFlags parentFlags = mapData.Types[mapDataIndex];
mapFlags preserve = mapFlags::OMP_MAP_TO | mapFlags::OMP_MAP_FROM |
mapFlags::OMP_MAP_ALWAYS | mapFlags::OMP_MAP_CLOSE |
- mapFlags::OMP_MAP_PRESENT | mapFlags::OMP_MAP_OMPX_HOLD;
+ mapFlags::OMP_MAP_PRESENT |
+ mapFlags::OMP_MAP_OMPX_HOLD |
+ mapFlags::OMP_MAP_IMPLICIT;
+ baseFlag |= (parentFlags & preserve);
+ } else {
+ mapFlags parentFlags = mapData.Types[mapDataIndex];
+ mapFlags preserve = mapFlags::OMP_MAP_PRESENT;
baseFlag |= (parentFlags & preserve);
}
@@ -4911,15 +5056,32 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
int firstMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
- lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
+ lowAddr = builder.CreatePointerCast(mapData.BasePointers[firstMemberIdx],
builder.getPtrTy());
+
int lastMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
+ auto lastMemberMapInfo =
+ cast<omp::MapInfoOp>(mapData.MapClause[lastMemberIdx]);
+
+ // NOTE: Currently, for RefPtee the BaseType is set to the varPtrPtr field,
+ // which is the pointer datas type and not the member within the structure
+ // that it's part of, so we have to make sure we use the member type in this
+ // case when calculating the parents size offsets.
+ // TODO: May be good to extend MapInfoData to support tracking of both
+ // VarPtr/VarPtrPtr BaseType's to better distinguish what's being used more
+ // consistently.
+ bool isRefPteeMap = checkHasClauseMapFlag(lastMemberMapInfo.getMapType(),
+ omp::ClauseMapFlags::ref_ptee);
+ llvm::Type *castType = mapData.BaseType[lastMemberIdx];
+ if (isRefPteeMap)
+ castType =
+ moduleTranslation.convertType(lastMemberMapInfo.getVarPtrType());
highAddr = builder.CreatePointerCast(
- builder.CreateGEP(mapData.BaseType[lastMemberIdx],
- mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
+ builder.CreateGEP(castType, mapData.BasePointers[lastMemberIdx],
+ builder.getInt64(1)),
builder.getPtrTy());
- combinedInfo.Pointers.emplace_back(mapData.Pointers[firstMemberIdx]);
+ combinedInfo.Pointers.emplace_back(mapData.BasePointers[firstMemberIdx]);
}
llvm::Value *size = builder.CreateIntCast(
@@ -4928,9 +5090,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
/*isSigned=*/false);
combinedInfo.Sizes.push_back(size);
- llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
- ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
-
// This creates the initial MEMBER_OF mapping that consists of
// the parent/top level container (same as above effectively, except
// with a fixed initial compile time size and separate maptype which
@@ -4942,10 +5101,9 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
// for the map flags that Clang currently supports (e.g. it should do some
// further case specific flag modifications). For the moment, it handles
// what we support as expected.
- llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
- bool hasMapClose = (llvm::omp::OpenMPOffloadMappingFlags(mapFlag) &
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE) ==
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
+ mapFlags mapFlag = mapData.Types[mapDataIndex];
+ bool hasMapClose = (mapFlags(mapFlag) & mapFlags::OMP_MAP_CLOSE) ==
+ mapFlags::OMP_MAP_CLOSE;
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
if (targetDirective == TargetDirectiveEnumTy::TargetUpdate || hasMapClose) {
@@ -4980,7 +5138,7 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
// It appears to be an optimisation rather than a necessity though,
// but this requires further investigation. However, we would have to make
// sure to not exclude maps with bounds that ARE pointers, as these are
- // processed as separate components, i.e. pointer + data.
+ // processed as seperate components, i.e. pointer + data.
for (auto v : overlapIdxs) {
auto mapDataOverlapIdx = getMapDataMemberIdx(
mapData,
@@ -5021,130 +5179,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
builder.getInt64Ty(), true));
}
}
- return memberOfFlag;
-}
-
-// This function is intended to add explicit mappings of members
-static void processMapMembersWithParent(
- LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
- llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo,
- MapInfoData &mapData, uint64_t mapDataIndex,
- llvm::omp::OpenMPOffloadMappingFlags memberOfFlag,
- TargetDirectiveEnumTy targetDirective) {
- assert(!ompBuilder.Config.isTargetDevice() &&
- "function only supported for host device codegen");
-
- auto parentClause =
- llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
-
- for (auto mappedMembers : parentClause.getMembers()) {
- auto memberClause =
- llvm::cast<omp::MapInfoOp>(mappedMembers.getDefiningOp());
- int memberDataIdx = getMapDataMemberIdx(mapData, memberClause);
-
- assert(memberDataIdx >= 0 && "could not find mapped member of structure");
-
- // If we're currently mapping a pointer to a block of data, we must
- // initially map the pointer, and then attatch/bind the data with a
- // subsequent map to the pointer. This segment of code generates the
- // pointer mapping, which can in certain cases be optimised out as Clang
- // currently does in its lowering. However, for the moment we do not do so,
- // in part as we currently have substantially less information on the data
- // being mapped at this stage.
- if (checkIfPointerMap(memberClause)) {
- auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
- mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
- ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- combinedInfo.Types.emplace_back(mapFlag);
- combinedInfo.DevicePointers.emplace_back(
- llvm::OpenMPIRBuilder::DeviceInfoTy::None);
- combinedInfo.Mappers.emplace_back(nullptr);
- combinedInfo.Names.emplace_back(
- LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- combinedInfo.BasePointers.emplace_back(
- mapData.BasePointers[mapDataIndex]);
- combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
- combinedInfo.Sizes.emplace_back(builder.getInt64(
- moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
- }
-
- // Same MemberOfFlag to indicate its link with parent and other members
- // of.
- auto mapFlag = convertClauseMapFlags(memberClause.getMapType());
- mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
- ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
- bool isDeclTargetTo = isDeclareTargetTo(parentClause.getVarPtr()
- ? parentClause.getVarPtr()
- : parentClause.getVarPtrPtr());
- if (checkIfPointerMap(memberClause) &&
- (!isDeclTargetTo ||
- (targetDirective != TargetDirectiveEnumTy::TargetUpdate &&
- targetDirective != TargetDirectiveEnumTy::TargetData))) {
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
- }
-
- combinedInfo.Types.emplace_back(mapFlag);
- combinedInfo.DevicePointers.emplace_back(
- mapData.DevicePointers[memberDataIdx]);
- combinedInfo.Mappers.emplace_back(mapData.Mappers[memberDataIdx]);
- combinedInfo.Names.emplace_back(
- LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- uint64_t basePointerIndex =
- checkIfPointerMap(memberClause) ? memberDataIdx : mapDataIndex;
- combinedInfo.BasePointers.emplace_back(
- mapData.BasePointers[basePointerIndex]);
- combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
-
- llvm::Value *size = mapData.Sizes[memberDataIdx];
- if (checkIfPointerMap(memberClause)) {
- size = builder.CreateSelect(
- builder.CreateIsNull(mapData.Pointers[memberDataIdx]),
- builder.getInt64(0), size);
- }
-
- combinedInfo.Sizes.emplace_back(size);
- }
-}
-
-static void processIndividualMap(MapInfoData &mapData, size_t mapDataIdx,
- MapInfosTy &combinedInfo,
- TargetDirectiveEnumTy targetDirective,
- int mapDataParentIdx = -1) {
- // Declare Target Mappings are excluded from being marked as
- // OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're
- // marked with OMP_MAP_PTR_AND_OBJ instead.
- auto mapFlag = mapData.Types[mapDataIdx];
- auto mapInfoOp = llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIdx]);
-
- bool isPtrTy = checkIfPointerMap(mapInfoOp);
- if (isPtrTy)
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
-
- if (targetDirective == TargetDirectiveEnumTy::Target &&
- !mapData.IsDeclareTarget[mapDataIdx])
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
-
- if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy &&
- !isPtrTy)
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
-
- // if we're provided a mapDataParentIdx, then the data being mapped is
- // part of a larger object (in a parent <-> member mapping) and in this
- // case our BasePointer should be the parent.
- if (mapDataParentIdx >= 0)
- combinedInfo.BasePointers.emplace_back(
- mapData.BasePointers[mapDataParentIdx]);
- else
- combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIdx]);
-
- combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIdx]);
- combinedInfo.DevicePointers.emplace_back(mapData.DevicePointers[mapDataIdx]);
- combinedInfo.Mappers.emplace_back(mapData.Mappers[mapDataIdx]);
- combinedInfo.Names.emplace_back(mapData.Names[mapDataIdx]);
- combinedInfo.Types.emplace_back(mapFlag);
- combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIdx]);
}
static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
@@ -5176,18 +5210,41 @@ static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation,
// Clang maps array without bounds as pointers (which we do not
// currently do), whereas we treat them as arrays in all cases
// currently.
- processIndividualMap(mapData, memberDataIdx, combinedInfo, targetDirective,
- mapDataIndex);
+ processIndividualMap(builder, ompBuilder, mapData, memberDataIdx,
+ combinedInfo, targetDirective,
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE,
+ true, mapDataIndex);
return;
}
- llvm::omp::OpenMPOffloadMappingFlags memberOfParentFlag =
+ auto collectMapInfoIdxs =
+ [&](llvm::SmallVectorImpl<int64_t> &mapsAndInfoIdx) {
+ auto parentClause =
+ llvm::cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
+ mapsAndInfoIdx.push_back(getMapDataMemberIdx(mapData, parentClause));
+ for (auto member : parentClause.getMembers())
+ mapsAndInfoIdx.push_back(getMapDataMemberIdx(
+ mapData, llvm::cast<omp::MapInfoOp>(member.getDefiningOp())));
+ };
+
+ llvm::SmallVector<int64_t> mapInfoIdx;
+ collectMapInfoIdxs(mapInfoIdx);
+
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+ ompBuilder.getMemberOfFlag(combinedInfo.Types.size());
+ for (size_t i = 0; i < mapInfoIdx.size(); i++) {
+ // Index == 0 is the parent map and if it gets here it's an unattachable
+ // type and should have OMP_MAP_TARGET_PARAM applied and no MEMBER_OF flag.
+ if (i == 0) {
mapParentWithMembers(moduleTranslation, builder, ompBuilder, dl,
- combinedInfo, mapData, mapDataIndex,
+ combinedInfo, mapData, mapInfoIdx[i], memberOfFlag,
targetDirective);
- processMapMembersWithParent(moduleTranslation, builder, ompBuilder, dl,
- combinedInfo, mapData, mapDataIndex,
- memberOfParentFlag, targetDirective);
+ } else {
+ processIndividualMap(builder, ompBuilder, mapData, mapInfoIdx[i],
+ combinedInfo, targetDirective, memberOfFlag, false,
+ mapDataIndex);
+ }
+ }
}
// This is a variation on Clang's GenerateOpenMPCapturedVars, which
@@ -5201,9 +5258,17 @@ createAlteredByCaptureMap(MapInfoData &mapData,
assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() &&
"function only supported for host device codegen");
for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
- // if it's declare target, skip it, it's handled separately.
- if (!mapData.IsDeclareTarget[i]) {
- auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+ auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+ bool isAttachMap =
+ ((convertClauseMapFlags(mapOp.getMapType()) &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+
+ // If it's declare target, skip it, it's handled separately. However, if
+ // it's declare target, and an attach map, we want to calculate the exact
+ // address offset so that we attach correctly.
+ if (!mapData.IsDeclareTarget[i] ||
+ (mapData.IsDeclareTarget[i] && isAttachMap)) {
omp::VariableCaptureKind captureKind = mapOp.getMapCaptureType();
bool isPtrTy = checkIfPointerMap(mapOp);
@@ -5290,8 +5355,6 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
// utilise the size from any component of MapInfoData, if we can't
// something is missing from the initial MapInfoData construction.
for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
- // NOTE/TODO: We currently do not support arbitrary depth record
- // type mapping.
if (mapData.IsAMember[i])
continue;
@@ -5302,7 +5365,8 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
continue;
}
- processIndividualMap(mapData, i, combinedInfo, targetDirective);
+ processIndividualMap(builder, *ompBuilder, mapData, i, combinedInfo,
+ targetDirective);
}
}
@@ -5845,40 +5909,43 @@ handleDeclareTargetMapVar(MapInfoData &mapData,
// function to link the two variables in the runtime and then both the
// reference pointer and the pointer are assigned in the kernel argument
// structure for the host.
- if (mapData.IsDeclareTarget[i]) {
- // If the original map value is a constant, then we have to make sure all
- // of it's uses within the current kernel/function that we are going to
- // rewrite are converted to instructions, as we will be altering the old
- // use (OriginalValue) from a constant to an instruction, which will be
- // illegal and ICE the compiler if the user is a constant expression of
- // some kind e.g. a constant GEP.
- if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
- convertUsersOfConstantsToInstructions(constant, func, false);
-
- // The users iterator will get invalidated if we modify an element,
- // so we populate this vector of uses to alter each user on an
- // individual basis to emit its own load (rather than one load for
- // all).
- llvm::SmallVector<llvm::User *> userVec;
- for (llvm::User *user : mapData.OriginalValue[i]->users())
- userVec.push_back(user);
-
- for (llvm::User *user : userVec) {
- if (auto *insn = dyn_cast<llvm::Instruction>(user)) {
- if (insn->getFunction() == func) {
- auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
- llvm::Value *substitute = mapData.BasePointers[i];
- if (isDeclareTargetLink(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
- : mapOp.getVarPtr())) {
- builder.SetCurrentDebugLocation(insn->getDebugLoc());
- substitute = builder.CreateLoad(
- mapData.BasePointers[i]->getType(), mapData.BasePointers[i]);
- cast<llvm::LoadInst>(substitute)->moveBefore(insn->getIterator());
- }
- user->replaceUsesOfWith(mapData.OriginalValue[i], substitute);
- }
- }
+ if (!mapData.IsDeclareTarget[i])
+ continue;
+ // If the original map value is a constant, then we have to make sure all
+ // of it's uses within the current kernel/function that we are going to
+ // rewrite are converted to instructions, as we will be altering the old
+ // use (OriginalValue) from a constant to an instruction, which will be
+ // illegal and ICE the compiler if the user is a constant expression of
+ // some kind e.g. a constant GEP.
+ if (auto *constant = dyn_cast<llvm::Constant>(mapData.OriginalValue[i]))
+ convertUsersOfConstantsToInstructions(constant, func, false);
+
+ // The users iterator will get invalidated if we modify an element,
+ // so we populate this vector of uses to alter each user on an
+ // individual basis to emit its own load (rather than one load for
+ // all).
+ llvm::SmallVector<llvm::User *> userVec;
+ for (llvm::User *user : mapData.OriginalValue[i]->users())
+ userVec.push_back(user);
+
+ for (llvm::User *user : userVec) {
+ auto *insn = dyn_cast<llvm::Instruction>(user);
+ if (!insn || insn->getFunction() != func)
+ continue;
+ auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
+ llvm::Value *substitute = mapData.BasePointers[i];
+ if (isDeclareTargetLink(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
+ : mapOp.getVarPtr()) ||
+ (isDeclareTargetTo(mapOp.getVarPtrPtr() ? mapOp.getVarPtrPtr()
+ : mapOp.getVarPtr()) &&
+ moduleTranslation.getOpenMPBuilder()
+ ->Config.hasRequiresUnifiedSharedMemory())) {
+ builder.SetCurrentDebugLocation(insn->getDebugLoc());
+ substitute = builder.CreateLoad(mapData.BasePointers[i]->getType(),
+ mapData.BasePointers[i]);
+ cast<llvm::LoadInst>(substitute)->moveBefore(insn->getIterator());
}
+ user->replaceUsesOfWith(mapData.OriginalValue[i], substitute);
}
}
}
@@ -6621,13 +6688,18 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
}
for (size_t i = 0, e = mapData.OriginalValue.size(); i != e; ++i) {
- // declare target arguments are not passed to kernels as arguments
+ // 1) Declare target arguments are not passed to kernels as arguments
+ // 2) Attach maps are not passed in as arguments to kernels
+ // 3) Children of record objects are not passed in as arguments
// TODO: We currently do not handle cases where a member is explicitly
// passed in as an argument, this will likley need to be handled in
// the near future, rather than using IsAMember, it may be better to
// test if the relevant BlockArg is used within the target region and
// then use that as a basis for exclusion in the kernel inputs.
- if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i])
+ bool isAttachMap = (mapData.Types[i] &
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH;
+ if (!mapData.IsDeclareTarget[i] && !mapData.IsAMember[i] && !isAttachMap)
kernelInput.push_back(mapData.OriginalValue[i]);
}
diff --git a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
index e0471f6f303fd..afa07c93851df 100644
--- a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
+++ b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir
@@ -45,7 +45,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 :
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
%8 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
%9 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr, f32) -> !llvm.ptr {name = ""}
- %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to, attach) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"}
+ %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"}
+ %attach = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(ref_ptr_ptee, attach) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr, f32) -> !llvm.ptr {name = "scalar_alloc"}
omp.target map_entries(%10 -> %arg0 : !llvm.ptr) {
%14 = llvm.mlir.constant(1000000 : i32) : i32
%15 = llvm.mlir.constant(1 : i32) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir b/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
index ab59b597846bb..e35a3228ef4f2 100644
--- a/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-data-use-dev-ordering.mlir
@@ -15,7 +15,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
%5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
%6 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg3 : !llvm.ptr, i32) bounds(%3) -> !llvm.ptr
%7 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%6 : [0] : !llvm.ptr) -> !llvm.ptr
- %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg5 : !llvm.ptr, i32) -> !llvm.ptr
+ %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg5 : !llvm.ptr, f32) -> !llvm.ptr
%9 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%8 : [0] : !llvm.ptr) -> !llvm.ptr
%10 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
omp.target_data map_entries(%4, %5 : !llvm.ptr, !llvm.ptr) use_device_addr(%7 -> %arg6, %9 -> %arg7, %6 -> %arg8, %8 -> %arg9 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) use_device_ptr(%10 -> %arg10 : !llvm.ptr) {
@@ -67,12 +67,11 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: define void @mix_use_device_ptr_and_addr_and_map_(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
// CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_0_GEP]], align 8
-// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
// CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
-// CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_3_GEP]], align 8
+// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
// CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
// CHECK: %[[LOAD_BASEPTR_0:.*]] = load ptr, ptr %[[BASEPTR_0_GEP]], align 8
@@ -93,11 +92,11 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: define void @mix_use_device_ptr_and_addr_and_map_2(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
// CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_1_GEP]], align 8
-// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
// CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
-// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
+// CHECK: %[[BASEPTR_3_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
// CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_3_GEP]], align 8
// CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
// CHECK: %[[LOAD_BASEPTR_1:.*]] = load ptr, ptr %[[BASEPTR_1_GEP]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir b/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
new file mode 100644
index 0000000000000..aeb42615a6c8c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-host-ref-semantics.mlir
@@ -0,0 +1,364 @@
+
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Tests that we correctly lower the different variations of reference pointer
+// and attach semantics.
+
+module attributes {omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp<clause_requires none>, omp.target_triples = ["amdgcn-amd-amdhsa"], omp.version = #omp.version<version = 61>} {
+ llvm.func @attach_always_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, attach, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @attach_never_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @attach_auto_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, to) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptr_ptee_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ %map3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map3 -> %arg3, %map1 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptr_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptr) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptee_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(attach, ref_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @ref_ptr_ptee_attach_never_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
+ %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr, i32) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to, ref_ptr_ptee) capture(ByRef) members(%map1 : [0] : !llvm.ptr) -> !llvm.ptr {name = "x"}
+ omp.target map_entries(%map2 -> %arg2, %map1 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16388]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710661, i64 281474976710661, i64 3, i64 16384]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710657, i64 281474976710657, i64 1, i64 16384]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [2 x i64] [i64 0, i64 24]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 16384, i64 33]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 16384, i64 33]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710657, i64 1]
+
+// CHECK: define void @attach_always_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK: %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK: %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK: %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK: %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK: %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK: %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK: %[[VAL_19:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_20:.*]] = select i1 %[[VAL_19]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK: store i64 %[[VAL_20]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @attach_never_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK: %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK: %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK: %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK: %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK: %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK: %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @attach_auto_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK: %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK: %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK: %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK: %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK: %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK: %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK: %[[VAL_19:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_20:.*]] = select i1 %[[VAL_19]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK: store i64 %[[VAL_20]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_ptee_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK: %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK: %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK: %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK: %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK: %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK: %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK: %[[VAL_19:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_20:.*]] = select i1 %[[VAL_19]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [5 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [5 x i64], ptr %.offload_sizes, i32 0, i32 4
+// CHECK: store i64 %[[VAL_20]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_2:.*]] = select i1 %[[VAL_1]], i64 0, i64 24
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_2]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+
+// CHECK: define void @ref_ptee_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_0:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = icmp eq ptr %[[VAL_0]], null
+// CHECK: %[[VAL_3:.*]] = select i1 %[[VAL_2]], i64 0, i64 24
+// CHECK: %[[VAL_4:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_5:.*]] = select i1 %[[VAL_4]], i64 0, i64 4
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[VAL_0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_3]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_5]], ptr %[[SIZES]], align 8
+
+// CHECK: define void @ref_ptr_ptee_attach_never_(ptr %[[ARG0:.*]], ptr %[[ARG1:.*]])
+// CHECK: %[[VAL_1:.*]] = load ptr, ptr %[[ARG1]], align 8
+// CHECK: %[[VAL_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_3:.*]] = ptrtoint ptr %[[VAL_2]] to i64
+// CHECK: %[[VAL_4:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_5:.*]] = sub i64 %[[VAL_3]], %[[VAL_4]]
+// CHECK: %[[VAL_6:.*]] = sdiv exact i64 %[[VAL_5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ARG0]], i32 1
+// CHECK: %[[VAL_8:.*]] = ptrtoint ptr %[[ARG1]] to i64
+// CHECK: %[[VAL_9:.*]] = ptrtoint ptr %[[ARG0]] to i64
+// CHECK: %[[VAL_10:.*]] = sub i64 %[[VAL_8]], %[[VAL_9]]
+// CHECK: %[[VAL_11:.*]] = sdiv exact i64 %[[VAL_10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_12:.*]] = getelementptr ptr, ptr %[[ARG1]], i32 1
+// CHECK: %[[VAL_13:.*]] = ptrtoint ptr %[[VAL_7]] to i64
+// CHECK: %[[VAL_14:.*]] = ptrtoint ptr %[[VAL_12]] to i64
+// CHECK: %[[VAL_15:.*]] = sub i64 %[[VAL_13]], %[[VAL_14]]
+// CHECK: %[[VAL_16:.*]] = sdiv exact i64 %[[VAL_15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[VAL_17:.*]] = icmp eq ptr %[[VAL_1]], null
+// CHECK: %[[VAL_18:.*]] = select i1 %[[VAL_17]], i64 0, i64 4
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[VAL_6]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG0]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 1
+// CHECK: store i64 %[[VAL_11]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG0]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[VAL_12]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[VAL_16]], ptr %[[SIZES]], align 8
+// CHECK: %[[BASEPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARG1]], ptr %[[BASEPTRS]], align 8
+// CHECK: %[[OFFPTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[VAL_1]], ptr %[[OFFPTRS]], align 8
+// CHECK: %[[SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[VAL_18]], ptr %[[SIZES]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
index 4912cf34072fb..7b3eb5ce24257 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir
@@ -27,8 +27,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
}
}
-// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
-// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [3 x i64] [i64 0, i64 48, i64 0]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [3 x i64] [i64 32, i64 281474976710659, i64 3]
// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {
@@ -43,24 +43,19 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
-// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
-
-// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
-// CHECK: store ptr %[[NESTED_STRUCT_PTR_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
-// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 2
// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
index 8837b42f70a44..b1eac23230a50 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
@@ -41,14 +41,14 @@ llvm.func @_QQmain() {
// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_3]], i64 0, i64 1
// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1
// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64
-// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64
+// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_3]] to i64
// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
// CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8
+// CHECK: store ptr %[[MEMBER_ACCESS_3]], ptr %[[PTR_ARR]], align 8
// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
index 39b2f37a031a8..f61c03e5a53e9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir
@@ -25,33 +25,33 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
// CHECK: %struct.[[TSK_WTH_PRVTS:.*]] = type { %struct.kmp_task_ompbuilder_t, %struct.[[PRVTS:.*]] }
// CHECK: %struct.kmp_task_ompbuilder_t = type { ptr, ptr, i32, ptr, ptr }
-// CHECK: %struct.[[PRVTS]] = type { [6 x ptr], [6 x ptr], [6 x i64] }
+// CHECK: %struct.[[PRVTS]] = type { [5 x ptr], [5 x ptr], [5 x i64] }
// CHECK: define void @launch_(ptr captures(none) %0)
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
-// CHECK: %[[BASEPTRS:.*]] = alloca [6 x ptr], align 8
-// CHECK: %[[PTRS:.*]] = alloca [6 x ptr], align 8
-// CHECK: %[[MAPPERS:.*]] = alloca [6 x ptr], align 8
-// CHECK: %[[SIZES:.*]] = alloca [6 x i64], align 4
+// CHECK: %[[BASEPTRS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[PTRS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[MAPPERS:.*]] = alloca [5 x ptr], align 8
+// CHECK: %[[SIZES:.*]] = alloca [5 x i64], align 4
-// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [6 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
-// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [6 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
-// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [6 x ptr], ptr %[[PTRS]], i32 0, i32 0
-// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [6 x i64], ptr %[[SIZES]], i32 0, i32 0
+// CHECK: %[[VAL_20:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK: %[[BASEPTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK: %[[PTRS_GEP:.*]] = getelementptr inbounds [5 x ptr], ptr %[[PTRS]], i32 0, i32 0
+// CHECK: %[[SIZES_GEP:.*]] = getelementptr inbounds [5 x i64], ptr %[[SIZES]], i32 0, i32 0
// CHECK: %[[GL_THRD_NUM:.*]] = call i32 @__kmpc_global_thread_num
-// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 184, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1)
+// CHECK: %[[TASK_DESC:.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @4, i32 {{.*}}, i32 0, i64 160, i64 16, ptr [[TGT_TSK_PRXY_FNC:.*]], i64 -1)
// CHECK: %[[TSK_PTR:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 0
// CHECK: %[[SHAREDS:.*]] = getelementptr inbounds nuw %struct.kmp_task_ompbuilder_t, ptr %[[TSK_PTR]], i32 0, i32 0
// CHECK: %[[SHAREDS_PTR:.*]] = load ptr, ptr %[[SHAREDS]], align 8
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[SHAREDS_PTR]], ptr align 1 %[[STRUCTARG]], i64 16, i1 false)
// CHECK: %[[VAL_50:.*]] = getelementptr inbounds nuw %struct.[[TSK_WTH_PRVTS]], ptr %[[TASK_DESC]], i32 0, i32 1
// CHECK: %[[VAL_51:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 0
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 48, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_51]], ptr align 1 %[[BASEPTRS_GEP]], i64 40, i1 false)
// CHECK: %[[VAL_53:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 1
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 48, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_53]], ptr align 1 %[[PTRS_GEP]], i64 40, i1 false)
// CHECK: %[[VAL_54:.*]] = getelementptr inbounds nuw %struct.[[PRVTS]], ptr %[[VAL_50]], i32 0, i32 2
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 48, i1 false)
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_54]], ptr align 1 %[[SIZES_GEP]], i64 40, i1 false)
// CHECK: %[[VAL_55:.*]] = call i32 @__kmpc_omp_task(ptr @4, i32 %[[GL_THRD_NUM]], ptr %[[TASK_DESC]])
// CHECK: define internal void @[[WORKER:.*]](i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
index e19b96bb2d732..c5b966f6dc817 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
@@ -39,14 +39,14 @@ llvm.func @_QQmain() {
// CHECK: %[[LAST_MEMBER:.*]] = getelementptr inbounds [10 x i32], ptr %[[MEMBER_ACCESS_2]], i64 0, i64 1
// CHECK: %[[FIRST_MEMBER:.*]] = getelementptr i32, ptr %[[MEMBER_ACCESS_1]], i64 1
// CHECK: %[[FIRST_MEMBER_OFF:.*]] = ptrtoint ptr %[[FIRST_MEMBER]] to i64
-// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[LAST_MEMBER]] to i64
+// CHECK: %[[SECOND_MEMBER_OFF:.*]] = ptrtoint ptr %[[MEMBER_ACCESS_2]] to i64
// CHECK: %[[MEMBER_DIFF:.*]] = sub i64 %[[FIRST_MEMBER_OFF]], %[[SECOND_MEMBER_OFF]]
// CHECK: %[[OFFLOAD_SIZE:.*]] = sdiv exact i64 %[[MEMBER_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CHECK: %[[BASE_PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ALLOCA]], ptr %[[BASE_PTR_ARR]], align 8
// CHECK: %[[PTR_ARR:.*]] = getelementptr inbounds [3 x ptr], ptr %.offload_ptrs, i32 0, i32 0
-// CHECK: store ptr %[[LAST_MEMBER]], ptr %[[PTR_ARR]], align 8
+// CHECK: store ptr %[[MEMBER_ACCESS_2]], ptr %[[PTR_ARR]], align 8
// CHECK: %[[SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[OFFLOAD_SIZE]], ptr %[[SIZE_ARR]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
index 725b7c169c2df..0f1cfd6f7d74f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
@@ -59,9 +59,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
// CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
-// CHECK: @.offload_sizes = private unnamed_addr constant [15 x i64] [i64 0, i64 0, i64 0, i64 8, i64 0, i64 0, i64 0, i64 0, i64 8, i64 0, i64 0, i64 0, i64 0, i64 8, i64 0]
-// CHECK: @.offload_maptypes = private unnamed_addr constant [15 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 32, i64 1688849860263939, i64 1688849860263939, i64 1688849860263939, i64 1688849860263955, i64 32, i64 3096224743817219, i64 3096224743817219, i64 3096224743817219, i64 3096224743817235]
-// CHECK: @.offload_mapnames = private constant [15 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [12 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 3, i64 32, i64 1407374883553283, i64 1407374883553283, i64 3, i64 32, i64 2533274790395907, i64 2533274790395907, i64 3]
// CHECK: define void @main()
// CHECK: %[[SCALAR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
@@ -103,79 +101,65 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: %[[SCALAR_BASE_OFF_SZ2:.*]] = ptrtoint ptr %[[SCALAR_BASE_OFF]] to i64
// CHECK: %[[SCALAR_BASE_OFF_SZ3:.*]] = sub i64 %[[SCALAR_BASE_OFF_SZ1]], %[[SCALAR_BASE_OFF_SZ2]]
// CHECK: %[[SCALAR_BASE_OFF_SZ4:.*]] = sdiv exact i64 %[[SCALAR_BASE_OFF_SZ3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[FULL_ARR_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 2
// CHECK: store ptr getelementptr inbounds nuw (i8, ptr @full_arr, i64 8), ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 2
// CHECK: store i64 %[[FULL_ARR_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 3
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
-// CHECK: store ptr @full_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 3
// CHECK: store ptr %[[FULL_ARR_PTR]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 4
-// CHECK: store i64 %[[IS_NULL]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 5
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 4
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 5
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 4
// CHECK: store i64 %[[ARR_SECT_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 6
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 5
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 7
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 6
// CHECK: store ptr getelementptr inbounds nuw (i8, ptr @sect_arr, i64 8), ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 7
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 6
// CHECK: store i64 %[[ARR_SECT_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 8
-// CHECK: store ptr @sect_arr, ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
// CHECK: store ptr @sect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 9
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 7
// CHECK: store ptr %[[ARR_SECT_PTR]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 9
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 7
// CHECK: store i64 %[[IS_NULL2]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 10
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 10
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 8
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 10
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 8
// CHECK: store i64 %[[SCALAR_DESC_SZ]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 11
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 11
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 9
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 12
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 10
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 12
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 10
// CHECK: store ptr %[[SCALAR_BASE_OFF]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [15 x i64], ptr %.offload_sizes, i32 0, i32 12
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 10
// CHECK: store i64 %[[SCALAR_BASE_OFF_SZ4]], ptr %[[OFFLOADSIZES]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 13
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 11
// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 13
-// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADPTRS]], align 8
-// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_baseptrs, i32 0, i32 14
-// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8
-// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [15 x ptr], ptr %.offload_ptrs, i32 0, i32 14
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 11
// CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8
diff --git a/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90 b/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
index 45a18b7f38ed3..5e33770648e34 100644
--- a/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
+++ b/offload/test/offloading/fortran/descriptor-stack-jam-regression.f90
@@ -5,7 +5,8 @@
! device.
! REQUIRES: flang, amdgpu
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
module test
contains
subroutine kernel_1d(array)
diff --git a/offload/test/offloading/fortran/map_attach_always.f90 b/offload/test/offloading/fortran/map_attach_always.f90
new file mode 100644
index 0000000000000..f5c7c5e6df921
--- /dev/null
+++ b/offload/test/offloading/fortran/map_attach_always.f90
@@ -0,0 +1,70 @@
+
+! This checks that attach always forces attachment.
+! NOTE: We have to make sure the old default auto attach behaviour is off to
+! yield the correct results for this test. Otherwise the second target will
+! be treated as if we'd had the attach always specified!
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: a(10)
+ integer, target :: b(10)
+ integer :: index, n
+ logical :: correct
+
+ n = 10
+ correct = .true.
+
+ do index = 1, n
+ a(index) = 10
+ b(index) = 20
+ end do
+
+ map_ptr => a
+
+ ! This should map a,b and map_ptr to device, and attach map_ptr
+ ! to a (as it is assigned to it above), and as b is already on
+ ! device running through target.
+ !$omp target enter data map(ref_ptr_ptee, to: map_ptr)
+ !$omp target enter data map(to: b, a)
+
+ !$omp target map(to: index) map(tofrom: correct)
+ do index = 1, n
+ if (map_ptr(index) /= 10) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ map_ptr => b
+
+ ! No attach always to force re-attachment, so we should still
+ ! be attached to "a"
+ !$omp target map(to: index) map(tofrom: correct)
+ do index = 1, n
+ if (map_ptr(index) /= 10) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ !$omp target map(to: index) map(attach(always): map_ptr) map(tofrom: correct)
+ do index = 1, n
+ if (map_ptr(index) /= 20) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ if (correct .NEQV. .true.) then
+ print*, "Failed!"
+ stop 1
+ endif
+
+ print*, "Passed!"
+end program
+
+!CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_attach_never.f90 b/offload/test/offloading/fortran/map_attach_never.f90
new file mode 100644
index 0000000000000..5b4d2dc5c6cd8
--- /dev/null
+++ b/offload/test/offloading/fortran/map_attach_never.f90
@@ -0,0 +1,55 @@
+! This checks that attach never prevents pointer attachment when specified.
+! NOTE: We have to make sure the old default auto attach behaviour is off to
+! yield the correct results for this test. Otherwise the second target will
+! be treated as if we'd had the attach always specified!
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: a(10)
+ integer, target :: b(10)
+ integer :: index, n
+ logical :: correct
+
+ correct = .true.
+ n = 10
+
+ do index = 1, n
+ a(index) = 10
+ b(index) = 20
+ end do
+
+ map_ptr => a
+
+ ! This should map a and map_ptr to device, and attach map_ptr
+ ! to a (as it is assigned to it above).
+ !$omp target enter data map(ref_ptr_ptee, to: map_ptr)
+
+ map_ptr => b
+
+ ! As "b" hasn't been mapped to device yet, the first time it's mapped will
+ ! be when map_ptr is re-mapped (implicitly or explicitly), the default behavior
+ ! when LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS is switched off would force attachment
+ ! of map_ptr to b as we've assigned it above. To prevent this and test the never
+ ! attachment, we can apply attach(never), which prevents this reattachment from
+ ! occurring
+ !$omp target map(to: index) map(tofrom: correct) map(attach(never): map_ptr)
+ do index = 1, n
+ if (map_ptr(index) /= 10) then
+ correct = .false.
+ endif
+ end do
+ !$omp end target
+
+ if (correct .NEQV. .true.) then
+ print*, "Failed!"
+ stop 1
+ endif
+
+ print*, "Passed!"
+end program
+
+!CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90 b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
new file mode 100644
index 0000000000000..e8ead0a251f97
--- /dev/null
+++ b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_1.f90
@@ -0,0 +1,48 @@
+! This checks that we can specify ref_ptee and ref_ptr, not encounter
+! an error and correctly map data to and from device.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-version=61
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: b(10)
+ integer :: index
+
+ map_ptr => b
+
+ ! Should have auto attach applied if my reading is
+ ! correct and automatically attach to ref_ptee. So
+ ! internally we implicitly apply the attach map
+ ! type.
+ !$omp target enter data map(ref_ptee, to: map_ptr)
+ !$omp target enter data map(ref_ptr, to: map_ptr)
+
+ ! should in theory memory access fault if we haven't attached
+ ! correctly above. But if all went well should go fine.
+ !$omp target map(to: index)
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ ! Don't care about the descriptor, but we do want to
+ ! deallocate it and only it and then map the data
+ ! back. Doing it in a weird-ish order to test we can
+ ! delete the descriptor separately and still pull the
+ ! data back.
+ !$omp target exit data map(ref_ptee, from: map_ptr)
+ !$omp target exit data map(ref_ptr, delete: map_ptr)
+
+ do index = 1, 10
+ if (map_ptr(index) /= index) then
+ print*, "Failed!"
+ stop 1
+ endif
+ end do
+
+ print*, "Passed!"
+end program
+
+! CHECK: Passed!
diff --git a/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90 b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
new file mode 100644
index 0000000000000..b5fbfbe8df780
--- /dev/null
+++ b/offload/test/offloading/fortran/map_ref_ptr_ptee_test_2.f90
@@ -0,0 +1,47 @@
+! This checks that we can specify ref_ptee and ref_ptr, not encounter
+! an error and correctly map data to and from device. This does so
+! in a different order from map_ref_ptr_ptee_test_1.f90 to verify we
+! do not hit any odd runtime errors from mapping in a different order.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-version=61
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=0 %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+program main
+ implicit none
+ integer, pointer :: map_ptr(:)
+ integer, target :: b(10)
+ integer :: index
+
+ map_ptr => b
+
+ !$omp target enter data map(ref_ptr, to: map_ptr)
+ !$omp target enter data map(ref_ptee, to: map_ptr)
+
+ ! should in theory memory access fault if we haven't attached
+ ! correctly above. But if all went well should go fine.
+ !$omp target map(to: index)
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ ! Don't care about the descriptor, but we do want to
+ ! deallocate it and only it and then map the data
+ ! back. Doing it in a weird-ish order to test we can
+ ! delete the descriptor separately and still pull the
+ ! data back.
+ !$omp target exit data map(ref_ptr, delete: map_ptr)
+ !$omp target exit data map(ref_ptee, from: map_ptr)
+
+ do index = 1, 10
+ if (map_ptr(index) /= index) then
+ print*, "Failed!"
+ stop 1
+ endif
+ end do
+
+ print*, "Passed!"
+end program
+
+! CHECK: Passed
\ No newline at end of file
diff --git a/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90 b/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
index 8e1e68528943f..da9a8e70967f3 100644
--- a/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
+++ b/offload/test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90
@@ -4,7 +4,8 @@
! directives
! REQUIRES: flang, amdgpu
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
module dtype
type :: my_dtype
integer :: s, e
More information about the llvm-branch-commits
mailing list