[flang-commits] [openmp] [mlir] [flang] [Flang][OpenMP] Initial mapping of Fortran pointers and allocatables for target devices (PR #71766)
via flang-commits
flang-commits at lists.llvm.org
Fri Nov 10 18:17:47 PST 2023
https://github.com/agozillon updated https://github.com/llvm/llvm-project/pull/71766
>From 2eb161daa744c5fb21d860f1afac88bacaedcc70 Mon Sep 17 00:00:00 2001
From: Andrew Gozillon <Andrew.Gozillon at amd.com>
Date: Wed, 8 Nov 2023 20:39:49 -0600
Subject: [PATCH 1/6] [MLIR][OpenMP] Add is_fortran_allocatable flag to
omp.map_info operation
This patch adds a flag to the omp.map_info operation that allows
users to indicates if the mapped variable is a fortran allocatable
type (allocatable or pointer) with an associated type descriptor
that requires (or allows) specialised lowering.
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 6 +++++-
mlir/test/Dialect/OpenMP/ops.mlir | 15 +++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 99ac5cfb7b9e922..9fcbffdb930eb05 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1151,7 +1151,8 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
Variadic<DataBoundsType>:$bounds, /* rank-0 to rank-{n-1} */
OptionalAttr<UI64Attr>:$map_type,
OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
- OptionalAttr<StrAttr>:$name);
+ OptionalAttr<StrAttr>:$name,
+ DefaultValuedAttr<BoolAttr, "false">:$is_fortran_allocatable);
let results = (outs AnyType:$omp_ptr);
let description = [{
@@ -1198,6 +1199,9 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
- 'map_capture_type': Capture type for the variable e.g. this, byref, byvalue, byvla
this can affect how the variable is lowered.
- `name`: Holds the name of variable as specified in user clause (including bounds).
+ - 'is_fortran_allocatable': Indicates if the var_ptr variable is a fortran allocatable
+ type, e.g. a pointer or allocatable containing a descriptor mapping that wraps the
+ data and contains further information on the mapped variable.
}];
let assemblyFormat = [{
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 4d88d9ac86fe16c..0c2a152845da73b 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2082,3 +2082,18 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> ()
return
}
+
+// CHECK-LABEL: omp_targets_is_allocatable
+// CHECK-SAME: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr)
+func.func @omp_targets_is_allocatable(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () {
+ // CHECK: %[[MAP0:.*]] = omp.map_info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = ""}
+ %mapv1 = omp.map_info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = ""}
+ // CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = ""}
+ %mapv2 = omp.map_info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(to) capture(ByCopy) -> !llvm.ptr {is_fortran_allocatable = false, name = ""}
+ // CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr)
+ omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg2: !llvm.ptr, %arg3 : !llvm.ptr):
+ omp.terminator
+ }
+ return
+}
>From 7329207ca3ffaf22f2b0d46f1316e1812a2773de Mon Sep 17 00:00:00 2001
From: Andrew Gozillon <Andrew.Gozillon at amd.com>
Date: Wed, 8 Nov 2023 20:59:29 -0600
Subject: [PATCH 2/6] [Flang][OpenMP] Add specialized mapping of allocatable
and pointer types
This patch toggles the newly added is_fortran_allocatable parameter of the
omp.map_info operation when a mapped symbol is an allocatable or pointer,
this helps with later lowering of these mapped types.
It also seeks to use the symbol address, rather than the bounds
calculated base address as the mapped pointer. This is the
equivelant of giving a handle to the entire descriptor structure
instead of the internal pointer. This helps synchronize the implicit
and explicit mapping currently, but more importantly the original
symbol is currently what's lowered in the target region, the target
is lowered with the expectation of the full structure, not the internal
pointer, so maintaining this connection helps ease lowering.
It's also currently neccessary, as during HLFIR/FIR -> LLVM dialect
lowering excess allocas are generated, per embox/box and other
box related operations, if we map the given base address (that
tracks the internal pointer) it is not always possible to track back
from this base address to the original allocation, which makes
remapping the SSA uses of the old value in the target region to
a new SSA value impossible, as they're now two very different
values. This can be exasperated when there's multiple mappings
to the same allocatable variable across multiple target regions
or enter/exits.
---
flang/lib/Lower/OpenMP.cpp | 25 ++++-
flang/test/Lower/OpenMP/FIR/target.f90 | 2 +-
.../Lower/OpenMP/allocatable-array-bounds.f90 | 101 ++++++++++++++++++
flang/test/Lower/OpenMP/target.f90 | 2 +-
4 files changed, 124 insertions(+), 6 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/allocatable-array-bounds.f90
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 00f16026d9b4bb8..45001b4e4b0c104 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -54,6 +54,9 @@ getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject) {
Fortran::parser::Unwrap<Fortran::parser::ArrayElement>(
designator)) {
sym = GetFirstName(arrayEle->base).symbol;
+ } else if (auto *structComp = Fortran::parser::Unwrap<
+ Fortran::parser::StructureComponent>(designator)) {
+ sym = structComp->component.symbol;
} else if (const Fortran::parser::Name *name =
Fortran::semantics::getDesignatorNameIfDataRef(
designator)) {
@@ -1710,7 +1713,7 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value baseAddr, std::stringstream &name,
mlir::SmallVector<mlir::Value> bounds, uint64_t mapType,
mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy,
- bool isVal = false) {
+ bool isAllocatable = false, bool isVal = false) {
mlir::Value val, varPtr, varPtrPtr;
mlir::TypeAttr varType;
@@ -1732,6 +1735,10 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
builder.getStringAttr(name.str()));
+
+ if (isAllocatable)
+ op.setIsFortranAllocatableAttr(builder.getBoolAttr(isAllocatable));
+
return op;
}
@@ -1800,6 +1807,13 @@ bool ClauseProcessor::processMap(
converter, firOpBuilder, semanticsContext, stmtCtx, ompObject,
clauseLocation, asFortran, bounds, treatIndexAsSection);
+ // Address of the descriptor, not the de-referenced data, currently
+ // required for later lowering to LLVM-IR
+ if (Fortran::semantics::IsAllocatableOrPointer(
+ *getOmpObjectSymbol(ompObject)))
+ baseAddr =
+ converter.getSymbolAddress(*getOmpObjectSymbol(ompObject));
+
// Explicit map captures are captured ByRef by default,
// optimisation passes may alter this to ByCopy or other capture
// types to optimise
@@ -1808,7 +1822,9 @@ bool ClauseProcessor::processMap(
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
mapTypeBits),
- mlir::omp::VariableCaptureKind::ByRef, baseAddr.getType());
+ mlir::omp::VariableCaptureKind::ByRef, baseAddr.getType(),
+ Fortran::semantics::IsAllocatableOrPointer(
+ *getOmpObjectSymbol(ompObject)));
mapOperands.push_back(mapOp);
if (mapSymTypes)
@@ -2660,7 +2676,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
mapFlag),
- captureKind, baseOp.getType());
+ captureKind, baseOp.getType(),
+ Fortran::semantics::IsAllocatableOrPointer(sym));
mapOperands.push_back(mapOp);
mapSymTypes.push_back(baseOp.getType());
@@ -2685,7 +2702,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
- mlir::omp::VariableCaptureKind::ByCopy, val.getType(), true);
+ mlir::omp::VariableCaptureKind::ByCopy, val.getType(), false, true);
mapOperands.push_back(mapOp);
}
};
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index d5a8fb242de9219..cd5326f3224e220 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -316,7 +316,7 @@ end subroutine omp_target_device_ptr
subroutine omp_target_device_addr
integer, pointer :: a
!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
- !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {name = "a"}
+ !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {is_fortran_allocatable = true, name = "a"}
!CHECK: omp.target_data map_entries(%[[MAP]] : {{.*}}) use_device_addr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
!$omp target data map(tofrom: a) use_device_addr(a)
!CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
diff --git a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
new file mode 100644
index 000000000000000..aa2649632a87270
--- /dev/null
+++ b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
@@ -0,0 +1,101 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes HOST
+
+!HOST-LABEL: func.func @_QPread_write_section() {
+
+!HOST: %[[ALLOCA_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "sp_read", uniq_name = "_QFread_write_sectionEsp_read"}
+!HOST: %[[DECLARE_1:.*]]:2 = hlfir.declare %[[ALLOCA_1]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFread_write_sectionEsp_read"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+
+!HOST: %[[ALLOCA_2:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "sp_write", uniq_name = "_QFread_write_sectionEsp_write"}
+!HOST: %[[DECLARE_2:.*]]:2 = hlfir.declare %[[ALLOCA_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFread_write_sectionEsp_write"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+
+!HOST: %[[LOAD_1:.*]] = fir.load %[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[LOAD_2:.*]] = fir.load %[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_1:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_1:.*]]:3 = fir.box_dims %[[LOAD_2]], %[[CONSTANT_1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_2:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_2:.*]]:3 = fir.box_dims %[[LOAD_1]], %[[CONSTANT_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_3:.*]] = arith.constant 2 : index
+!HOST: %[[LB_1:.*]] = arith.subi %[[CONSTANT_3]], %[[BOX_1]]#0 : index
+!HOST: %[[CONSTANT_4:.*]] = arith.constant 5 : index
+!HOST: %[[UB_1:.*]] = arith.subi %[[CONSTANT_4]], %[[BOX_1]]#0 : index
+!HOST: %[[BOUNDS_1:.*]] = omp.bounds lower_bound(%[[LB_1]] : index) upper_bound(%[[UB_1]] : index) stride(%[[BOX_2]]#2 : index) start_idx(%[[BOX_1]]#0 : index) {stride_in_bytes = true}
+!HOST: %[[MAP_INFO_1:.*]] = omp.map_info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable = true, name = "sp_read(2:5)"}
+
+!HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_5:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_3:.*]]:3 = fir.box_dims %[[LOAD_4]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_6:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_4:.*]]:3 = fir.box_dims %[[LOAD_3]], %[[CONSTANT_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_7:.*]] = arith.constant 2 : index
+!HOST: %[[LB_2:.*]] = arith.subi %[[CONSTANT_7]], %[[BOX_3]]#0 : index
+!HOST: %[[CONSTANT_8:.*]] = arith.constant 5 : index
+!HOST: %[[UB_2:.*]] = arith.subi %[[CONSTANT_8]], %[[BOX_3]]#0 : index
+!HOST: %[[BOUNDS_2:.*]] = omp.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
+!HOST: %[[MAP_INFO_2:.*]] = omp.map_info var_ptr(%11#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable = true, name = "sp_write(2:5)"}
+subroutine read_write_section()
+ integer, allocatable :: sp_read(:)
+ integer, allocatable :: sp_write(:)
+ allocate(sp_read(10))
+ allocate(sp_write(10))
+ sp_write = (/0,0,0,0,0,0,0,0,0,0/)
+ sp_read = (/1,2,3,4,5,6,7,8,9,10/)
+
+!$omp target map(tofrom:sp_read(2:5)) map(tofrom:sp_write(2:5))
+ do i = 2, 5
+ sp_write(i) = sp_read(i)
+ end do
+!$omp end target
+end subroutine read_write_section
+
+module assumed_allocatable_array_routines
+ contains
+
+!HOST-LABEL: func.func @_QMassumed_allocatable_array_routinesPassumed_shape_array(
+
+!HOST: %[[DECLARE:.*]]:2 = hlfir.declare %[[ARG:.*]] {fortran_attrs = #fir.var_attrs<allocatable, intent_inout>, uniq_name = "_QMassumed_allocatable_array_routinesFassumed_shape_arrayEarr_read_write"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!HOST: %[[LOAD_1:.*]] = fir.load %[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[LOAD_2:.*]] = fir.load %[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_1:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_1:.*]]:3 = fir.box_dims %[[LOAD_2]], %[[CONSTANT_1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_2:.*]] = arith.constant 0 : index
+!HOST: %[[BOX_2:.*]]:3 = fir.box_dims %[[LOAD_1]], %[[CONSTANT_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+!HOST: %[[CONSTANT_3:.*]] = arith.constant 2 : index
+!HOST: %[[LB:.*]] = arith.subi %[[CONSTANT_3]], %[[BOX_1]]#0 : index
+!HOST: %[[CONSTANT_4:.*]] = arith.constant 5 : index
+!HOST: %[[UB:.*]] = arith.subi %[[CONSTANT_4]], %[[BOX_1]]#0 : index
+!HOST: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[LB]] : index) upper_bound(%[[UB]] : index) stride(%[[BOX_2]]#2 : index) start_idx(%[[BOX_1]]#0 : index) {stride_in_bytes = true}
+!HOST: %[[MAP_INFO:.*]] = omp.map_info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable = true, name = "arr_read_write(2:5)"}
+subroutine assumed_shape_array(arr_read_write)
+ integer, allocatable, intent(inout) :: arr_read_write(:)
+
+!$omp target map(tofrom:arr_read_write(2:5))
+ do i = 2, 5
+ arr_read_write(i) = i
+ end do
+!$omp end target
+end subroutine assumed_shape_array
+end module assumed_allocatable_array_routines
+
+!HOST-LABEL: func.func @_QPcall_assumed_shape_and_size_array() {
+!HOST: %[[ALLOCA:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>> {bindc_name = "arr_read_write", uniq_name = "_QFcall_assumed_shape_and_size_arrayEarr_read_write"}
+!HOST: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFcall_assumed_shape_and_size_arrayEarr_read_write"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+!HOST: %[[ALLOCA_MEM:.*]] = fir.allocmem !fir.array<?xi32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFcall_assumed_shape_and_size_arrayEarr_read_write.alloc"}
+!HOST: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+!HOST: %[[EMBOX:.*]] = fir.embox %[[ALLOCA_MEM]](%[[SHAPE]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+!HOST: fir.store %[[EMBOX]] to %[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[LOAD:.*]] = fir.load %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+!HOST: %[[CONSTANT_1:.*]] = arith.constant 10 : index
+!HOST: %[[CONSTANT_2:.*]] = arith.constant 20 : index
+!HOST: %[[CONSTANT_3:.*]] = arith.constant 1 : index
+!HOST: %[[CONSTANT_4:.*]] = arith.constant 11 : index
+!HOST: %[[SHAPE:.*]] = fir.shape %[[CONSTANT_4]] : (index) -> !fir.shape<1>
+!HOST: %[[DESIGNATE:.*]] = hlfir.designate %[[LOAD]] (%[[CONSTANT_1]]:%[[CONSTANT_2]]:%[[CONSTANT_3]]) shape %[[SHAPE]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<11xi32>>
+!HOST: fir.call @_QPassumed_size_array(%[[DESIGNATE]]) fastmath<contract> : (!fir.ref<!fir.array<11xi32>>) -> ()
+subroutine call_assumed_shape_and_size_array
+ use assumed_allocatable_array_routines
+ integer, allocatable :: arr_read_write(:)
+ allocate(arr_read_write(20))
+ call assumed_size_array(arr_read_write(10:20))
+ deallocate(arr_read_write)
+end subroutine call_assumed_shape_and_size_array
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 86f456b847df90a..a409769ae4f8bf4 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -340,7 +340,7 @@ subroutine omp_target_device_addr
integer, pointer :: a
!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
!CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
- !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {name = "a"}
+ !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {is_fortran_allocatable = true, name = "a"}
!CHECK: omp.target_data map_entries(%[[MAP]] : {{.*}}) use_device_addr(%[[VAL_0_DECL]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
!$omp target data map(tofrom: a) use_device_addr(a)
!CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
>From bf004e9777635c9294378693b36efd166fafa20d Mon Sep 17 00:00:00 2001
From: Andrew Gozillon <Andrew.Gozillon at amd.com>
Date: Wed, 8 Nov 2023 21:14:17 -0600
Subject: [PATCH 3/6] [Flang][FIR][CodeGen] Specialised lowering of
omp.map_info to maintain descriptor type for allocatables
The lowering of mapped values from OpenMP to LLVM-IR requires
the underlying type to generate the appropriate instructions for
offloading data to devices.
With the move to opaque pointers we now must maintain a seperate
MLIR type inside of the omp.map_info which contains the underlying
type beneath the opaque pointers. Currently, the descriptor types
for allocatables and pointers (and other types possibly) will devolve
to opaque pointers using the default omp.map_info lowering inside
of the OpenMP dialect as they go through the more generalized
FIR lowering process.
This patch seeks to fix that by creating a specialised lowering for
omp.map_info inside of CodeGen.cpp that uses a type lowering
similar to the method that the specialised box/embox operations
use. This allows the appropriate type lowering for descriptors.
This is unfortunately a required addition to CodeGen.cpp as we have
no access to the specific FIR type lowering we require in the OpenMP
dialect lowering as FIR/HLFIR is not part of the MLIR project like
OpenMP currently is.
---
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 45 +++++++++++++++++++++
flang/test/Lower/OpenMP/allocatable-map.f90 | 16 ++++++++
2 files changed, 61 insertions(+)
create mode 100644 flang/test/Lower/OpenMP/allocatable-map.f90
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 9eabacdc818f6f4..d0ac9ea262be81e 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -417,6 +417,46 @@ class FIROpAndTypeConversion : public FIROpConversion<FromOp> {
};
} // namespace
+namespace {
+// FIR Op specific conversion for MapInfoOp that overwrites the default OpenMP
+// Dialect lowering, this allows FIR specific lowering of types, required for
+// descriptors of allocatables currently.
+struct MapInfoOpConversion : public FIROpConversion<mlir::omp::MapInfoOp> {
+ using FIROpConversion::FIROpConversion;
+
+ mlir::LogicalResult
+ matchAndRewrite(mlir::omp::MapInfoOp curOp, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override {
+ const mlir::TypeConverter *converter = getTypeConverter();
+
+ llvm::SmallVector<mlir::Type> resTypes;
+ if (failed(converter->convertTypes(curOp->getResultTypes(), resTypes)))
+ return mlir::failure();
+
+ // Copy attributes of the curOp except for the typeAttr which should
+ // be converted
+ llvm::SmallVector<mlir::NamedAttribute> newAttrs;
+ for (mlir::NamedAttribute attr : curOp->getAttrs()) {
+ if (auto typeAttr = mlir::dyn_cast<mlir::TypeAttr>(attr.getValue())) {
+ mlir::Type newAttr;
+ if (curOp.getIsFortranAllocatable()) {
+ newAttr = this->getBoxTypePair(typeAttr.getValue()).llvm;
+ } else {
+ newAttr = converter->convertType(typeAttr.getValue());
+ }
+ newAttrs.emplace_back(attr.getName(), mlir::TypeAttr::get(newAttr));
+ } else {
+ newAttrs.push_back(attr);
+ }
+ }
+
+ rewriter.replaceOpWithNewOp<mlir::omp::MapInfoOp>(
+ curOp, resTypes, adaptor.getOperands(), newAttrs);
+ return mlir::success();
+ }
+};
+} // namespace
+
namespace {
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public FIROpConversion<fir::AddrOfOp> {
@@ -3828,6 +3868,11 @@ class FIRToLLVMLowering
options);
mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern);
mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern);
+
+ // Insert OpenMP FIR specific conversion patterns that override OpenMP
+ // dialect default conversion patterns.
+ pattern.insert<MapInfoOpConversion>(typeConverter, options);
+
mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern);
mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter,
pattern);
diff --git a/flang/test/Lower/OpenMP/allocatable-map.f90 b/flang/test/Lower/OpenMP/allocatable-map.f90
new file mode 100644
index 000000000000000..7d10ff181d76e05
--- /dev/null
+++ b/flang/test/Lower/OpenMP/allocatable-map.f90
@@ -0,0 +1,16 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes="HLFIRDIALECT"
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefixes="LLVMOMPDIALECT"
+
+!LLVMOMPDIALECT: %[[ALLOCA:.*]] = llvm.alloca {{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "point"} : (i64) -> !llvm.ptr
+!LLVMOMPDIALECT: %[[MAP:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(implicit, tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = "point"}
+!LLVMOMPDIALECT: omp.target map_entries({{.*}}, %[[MAP]] -> {{.*}} : {{.*}}, !llvm.ptr) {
+
+!HLFIRDIALECT: %[[POINTER:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_routineEpoint"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
+!HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map_info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {is_fortran_allocatable = true, name = "point"}
+!HLFIRDIALECT: omp.target map_entries({{.*}}, %[[POINTER_MAP]] -> {{.*}} : {{.*}}, !fir.ref<!fir.box<!fir.ptr<i32>>>) {
+subroutine pointer_routine()
+ integer, pointer :: point
+!$omp target map(tofrom:pointer)
+ point = 1
+!$omp end target
+end subroutine pointer_routine
>From 9c03bcd3f5d4b2c7a8fb2c4e42eb5ddbe32e5f2d Mon Sep 17 00:00:00 2001
From: Andrew Gozillon <Andrew.Gozillon at amd.com>
Date: Wed, 8 Nov 2023 21:34:17 -0600
Subject: [PATCH 4/6] [OpenMP][MLIR][Flang] Add specialised offloading lowering
of fortran allocatable types to OpenMPToLLVMIRTranslation
This patch seeks to add a specialsied lowering of mapping
of Fortran allocatables/pointer types to the OpenMP -> LLVM-IR
lowering phase of the OpenMP dialect.
These types come wrapped in a structure (descriptor) which
currently must be mapped alongside the data (a pointer inside
of the descriptor structure) they hold. This descriptor structure
contains information such as type size of elements, upper bounds,
lower bounds, and other information. The current target lowering
will lower the region with the expectation that this descriptor is
mapped across to the kernel and useable inside of the kernel to
perform various instructions. I believe future OpenMP wording
is also leaning towards explicit mapping of descriptors for Fortran
allocatables in any case.
So we must lower both the descriptor and the internal pointer that
points to the user specified data that they wish to access on device.
This patch does so utilising a similar mapping to how Clang maps
structures containing pointer members, e..g.
map(tofrom: descriptor, descriptor->data)
Where the descriptors data is mapped as a member_of the larger
structure. The offset and size to be offloaded are currently
calculated by the data stored in the descriptor.
---
.../Driver/OpenMP/map-types-and-sizes.f90 | 23 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 236 ++++++++++-
...target-fortran-allocatable-types-host.mlir | 374 ++++++++++++++++++
3 files changed, 621 insertions(+), 12 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
diff --git a/flang/test/Driver/OpenMP/map-types-and-sizes.f90 b/flang/test/Driver/OpenMP/map-types-and-sizes.f90
index e4f429e479af158..aa7e0042860296b 100644
--- a/flang/test/Driver/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Driver/OpenMP/map-types-and-sizes.f90
@@ -22,7 +22,6 @@ subroutine mapType_array
!$omp end target
end subroutine mapType_array
-!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
subroutine mapType_ptr
integer, pointer :: a
@@ -31,6 +30,16 @@ subroutine mapType_ptr
!$omp end target
end subroutine mapType_ptr
+!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
+subroutine mapType_allocatable
+ integer, allocatable :: a
+ allocate(a)
+ !$omp target
+ a = 10
+ !$omp end target
+ deallocate(a)
+end subroutine mapType_allocatable
+
!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [2 x i64] [i64 8, i64 4]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [2 x i64] [i64 544, i64 800]
subroutine mapType_c_ptr
@@ -50,3 +59,15 @@ subroutine mapType_char
a = 'b'
!$omp end target
end subroutine mapType_char
+
+!CHECK-LABEL: define void @maptype_ptr_() {
+!CHECK: %[[DESC_ELE_SIZE:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %{{.*}}, i64 0, i32 1
+!CHECK: %[[DESC_ELE_SIZE_LOAD:.*]] = load i64, ptr %[[DESC_ELE_SIZE]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
+!CHECK: store i64 %[[DESC_ELE_SIZE_LOAD]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+
+!CHECK-LABEL: define void @maptype_allocatable_() {
+!CHECK: %[[DESC_ELE_SIZE:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %{{.*}}, i64 0, i32 1
+!CHECK: %[[DESC_ELE_SIZE_LOAD:.*]] = load i64, ptr %[[DESC_ELE_SIZE]], align 8
+!CHECK: %[[OFFLOAD_SIZE_ARR_1:.*]] = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
+!CHECK: store i64 %[[DESC_ELE_SIZE_LOAD]], ptr %[[OFFLOAD_SIZE_ARR_1]], align 8
\ No newline at end of file
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d9ab785a082835d..ab57b5a5f5cc83f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1676,7 +1676,8 @@ uint64_t getArrayElementSizeInBits(LLVM::LLVMArrayType arrTy, DataLayout &dl) {
// This function is somewhat equivalent to Clang's getExprTypeSize inside of
// CGOpenMPRuntime.cpp.
llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
- Operation *clauseOp, llvm::IRBuilderBase &builder,
+ Operation *clauseOp, llvm::Value *basePointer,
+ llvm::Type *baseType, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
// utilising getTypeSizeInBits instead of getTypeSize as getTypeSize gives
// the size in inconsistent byte or bit format.
@@ -1710,6 +1711,24 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
}
}
+ // The size in bytes x number of elements, for allocatables, the
+ // underlying element type is stored inside of the descriptor, we
+ // can retrieve and use it directly here. However, it may also be
+ // possible to use the BoundsOp's getStride, which points to the
+ // same field, however, unsure if the stride will change based
+ // on a user specified stride, whereas, the element size
+ // of the descriptor should never change and can be used to
+ // calculate other things.
+ if (memberClause.getIsFortranAllocatable()) {
+ llvm::Value *memberEleByteSize = builder.CreateLoad(
+ builder.getInt64Ty(),
+ builder.CreateGEP(baseType, basePointer,
+ std::vector<llvm::Value *>{builder.getInt64(0),
+ builder.getInt32(1)},
+ "element_size"));
+ return builder.CreateMul(elementCount, memberEleByteSize);
+ }
+
// The size in bytes x number of elements, the sizeInBytes stored is
// the underyling types size, e.g. if ptr<i32>, it'll be the i32's
// size, so we do some on the fly runtime math to get the size in
@@ -1718,11 +1737,38 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
return builder.CreateMul(elementCount,
builder.getInt64(underlyingTypeSzInBits / 8));
}
+
+ // The case for allocatables that are not arrays, could perhaps treat these
+ // as just pointer size
+ if (memberClause.getBounds().empty() &&
+ memberClause.getIsFortranAllocatable()) {
+ return builder.CreateLoad(
+ builder.getInt64Ty(),
+ builder.CreateGEP(baseType, basePointer,
+ std::vector<llvm::Value *>{builder.getInt64(0),
+ builder.getInt32(1)},
+ "element_size"));
+ }
}
return builder.getInt64(underlyingTypeSzInBits / 8);
}
+// Get the underlying LLVM type, this will bypass the pointer and
+// access the underlying type. Which bypasses llvm's opaque pointers
+// to get the underlying type via MLIR.
+llvm::Type *getLLVMIRType(mlir::Type inputType,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::Type *type = moduleTranslation.convertType(inputType);
+ if (auto pointerType =
+ llvm::dyn_cast<mlir::omp::PointerLikeType>(inputType)) {
+ if (auto eleType = pointerType.getElementType()) {
+ type = moduleTranslation.convertType(eleType);
+ }
+ }
+ return type;
+}
+
void collectMapDataFromMapOperands(MapInfoData &mapData,
llvm::SmallVectorImpl<Value> &mapOperands,
LLVM::ModuleTranslation &moduleTranslation,
@@ -1747,14 +1793,16 @@ void collectMapDataFromMapOperands(MapInfoData &mapData,
mapData.BasePointers.push_back(mapData.OriginalValue.back());
}
+ mapData.BaseType.push_back(getLLVMIRType(mapOp.getVal()
+ ? mapOp.getVal().getType()
+ : mapOp.getVarType().value(),
+ moduleTranslation));
mapData.Sizes.push_back(
getSizeInBytes(dl,
mapOp.getVal() ? mapOp.getVal().getType()
: mapOp.getVarType().value(),
- mapOp, builder, moduleTranslation));
- mapData.BaseType.push_back(moduleTranslation.convertType(
- mapOp.getVal() ? mapOp.getVal().getType()
- : mapOp.getVarType().value()));
+ mapOp, mapData.BasePointers.back(),
+ mapData.BaseType.back(), builder, moduleTranslation));
mapData.MapClause.push_back(mapOp.getOperation());
mapData.Types.push_back(
llvm::omp::OpenMPOffloadMappingFlags(mapOp.getMapType().value()));
@@ -1766,6 +1814,155 @@ void collectMapDataFromMapOperands(MapInfoData &mapData,
}
}
+// Fortran pointers and allocatables come with descriptor information
+// alongside a pointer to the data itself, currently we map this together as
+// we would a regular C/C++ structure with a pointer member that's been
+// specified explicitly as a map argument. This is in part due to the
+// lowered target region from Fortran having expectations that it's
+// dealing with this descriptor and the pointer or allocatable information
+// to perform runtime checks and calculations. It may be possible to
+// optimise this out and just map the data on it's own in the future,
+// however, future specification iterations appear to be moving toward
+// explicit wording of how to map the descriptor so it is likely unneccessary
+// to do so (and this function may need slight revising at that point).
+//
+// The descriptor information (pointed to by the BoundsOp of the map or
+// accessible through the MapInfoData entries BasePointer) allows us to take
+// liberties with the mapping and assume certain things as it contains dynamic
+// information for the Fortran allocatable or pointer mapping, such as its
+// runtime size, and type. However, this function can be more generally
+// applicable to class, structure or dervied type mappings with some alterations
+// when we support those, it is essentially a specialized lowering of those
+// types for allocatables (descriptor being the container, the data being the
+// member).
+static void generateAllocatablesMapInfo(
+ LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder,
+ llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl,
+ llvm::OpenMPIRBuilder::MapInfosTy &combinedInfo, MapInfoData &mapData,
+ uint64_t mapDataIndex, bool isTargetParams) {
+ auto mapClauseInfo =
+ mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
+
+ ////////// First Descriptor Structure Segment //////////
+ combinedInfo.Types.emplace_back(
+ isTargetParams
+ ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
+ : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE);
+ combinedInfo.DevicePointers.emplace_back(
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+ combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
+ mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
+ combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
+ combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
+
+ llvm::Value *lowAddr = builder.CreatePointerCast(
+ mapData.Pointers[mapDataIndex], builder.getPtrTy());
+ llvm::Value *highAddr = builder.CreatePointerCast(
+ builder.CreateConstGEP1_32(mapData.BaseType[mapDataIndex],
+ mapData.Pointers[mapDataIndex], 1),
+ builder.getPtrTy());
+ llvm::Value *size = builder.CreateIntCast(
+ builder.CreatePtrDiff(builder.getInt8Ty(), highAddr, lowAddr),
+ builder.getInt64Ty(),
+ /*isSigned=*/false);
+ combinedInfo.Sizes.push_back(size);
+
+ ////////// Second Descriptor Structure Segment //////////
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ if (isTargetParams)
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+
+ llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
+ ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.DevicePointers.emplace_back(
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+ combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
+ mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
+ combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
+ combinedInfo.Pointers.emplace_back(mapData.Pointers[mapDataIndex]);
+ combinedInfo.Sizes.emplace_back(builder.getInt64(
+ moduleTranslation.getLLVMModule()->getDataLayout().getTypeSizeInBits(
+ mapData.BaseType[mapDataIndex]) /
+ 8));
+
+ ///// Mapping of the data, that the descriptor pointer points to/ /////
+ // Same MemberOfFlag to indicate its link with parent and other members
+ // of, and we flag that it's part of a pointer and object coupling.
+ mapFlag =
+ llvm::omp::OpenMPOffloadMappingFlags(mapClauseInfo.getMapType().value());
+
+ // TODO: Is target_param always required? seems unneccesary for enter +exit
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.DevicePointers.emplace_back(
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+ combinedInfo.Names.emplace_back(LLVM::createMappingInformation(
+ mapData.MapClause[mapDataIndex]->getLoc(), ompBuilder));
+
+ llvm::Value *memberBase = builder.CreateInBoundsGEP(
+ mapData.BaseType[mapDataIndex], mapData.BasePointers[mapDataIndex],
+ std::vector<llvm::Value *>{builder.getInt64(0), builder.getInt32(0)},
+ "member_base");
+
+ combinedInfo.BasePointers.emplace_back(memberBase);
+
+ llvm::Value *offsetAddress = nullptr;
+ if (!mapClauseInfo.getBounds().empty()) {
+ llvm::Value *memberEleByteSize = builder.CreateLoad(
+ builder.getInt64Ty(),
+ builder.CreateGEP(mapData.BaseType[mapDataIndex],
+ mapData.BasePointers[mapDataIndex],
+ std::vector<llvm::Value *>{builder.getInt64(0),
+ builder.getInt32(1)},
+ "size"));
+
+ std::vector<llvm::Value *> dimensionIndexSizeOffset{memberEleByteSize};
+ for (size_t i = 1; i < mapClauseInfo.getBounds().size(); ++i) {
+ llvm::Value *ubExtent = builder.CreateLoad(
+ builder.getInt64Ty(),
+ builder.CreateGEP(mapData.BaseType[mapDataIndex],
+ mapData.BasePointers[mapDataIndex],
+ std::vector<llvm::Value *>{
+ builder.getInt64(0), builder.getInt32(7),
+ builder.getInt32(i), builder.getInt32(1)},
+ "ub_extent"));
+ dimensionIndexSizeOffset.push_back(
+ builder.CreateMul(ubExtent, dimensionIndexSizeOffset[i - 1]));
+ }
+
+ for (int i = mapClauseInfo.getBounds().size() - 1; i >= 0; --i) {
+ if (auto boundOp = mlir::dyn_cast_if_present<mlir::omp::DataBoundsOp>(
+ mapClauseInfo.getBounds()[i].getDefiningOp())) {
+ if (!offsetAddress)
+ offsetAddress = builder.CreateMul(
+ moduleTranslation.lookupValue(boundOp.getLowerBound()),
+ dimensionIndexSizeOffset[i]);
+ else
+ offsetAddress = builder.CreateAdd(
+ offsetAddress, builder.CreateMul(moduleTranslation.lookupValue(
+ boundOp.getLowerBound()),
+ dimensionIndexSizeOffset[i]));
+ }
+ }
+ }
+
+ llvm::Value *loadMember =
+ builder.CreateLoad(memberBase->getType(), memberBase);
+ llvm::Value *memberIdx = builder.CreateGEP(
+ builder.getInt8Ty(), loadMember,
+ std::vector<llvm::Value *>{offsetAddress ? offsetAddress
+ : builder.getInt64(0)},
+ "member_idx");
+ combinedInfo.Pointers.emplace_back(memberIdx);
+ combinedInfo.Sizes.emplace_back(mapData.Sizes[mapDataIndex]);
+}
+
// Generate all map related information and fill the combinedInfo.
static void genMapInfos(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
@@ -1792,6 +1989,24 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
// utilise the size from any component of MapInfoData, if we can't
// something is missing from the initial MapInfoData construction.
for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
+ auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]);
+ bool isImplicit =
+ mapInfoOp.getMapType().value() &
+ static_cast<
+ std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+
+ // TODO: Look into more thoroughly handling implicit semantics, this mimics
+ // what Clang currently does with structures, which is map the structure and
+ // not the internals i.e. the descriptor without the pointer itself, so not
+ // a deep copy, which may be incorrect for allocatables/pointers, but allows
+ // explicit mapping and initial enter/exit handling
+ if (!isImplicit && mapInfoOp.getIsFortranAllocatable()) {
+ generateAllocatablesMapInfo(moduleTranslation, builder, *ompBuilder, dl,
+ combinedInfo, mapData, i, isTargetParams);
+ continue;
+ }
+
// Declare Target Mappings are excluded from being marked as
// OMP_MAP_TARGET_PARAM as they are not passed as parameters, they're marked
// with OMP_MAP_PTR_AND_OBJ instead.
@@ -1801,12 +2016,11 @@ static void genMapInfos(llvm::IRBuilderBase &builder,
else if (isTargetParams)
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
- if (auto mapInfoOp = dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[i]))
- if (mapInfoOp.getMapCaptureType().value() ==
- mlir::omp::VariableCaptureKind::ByCopy &&
- !(mapInfoOp.getVarType().has_value() &&
- mapInfoOp.getVarType()->isa<LLVM::LLVMPointerType>()))
- mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
+ if (mapInfoOp.getMapCaptureType().value() ==
+ mlir::omp::VariableCaptureKind::ByCopy &&
+ !(mapInfoOp.getVarType().has_value() &&
+ mapInfoOp.getVarType()->isa<LLVM::LLVMPointerType>()))
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_LITERAL;
combinedInfo.BasePointers.emplace_back(mapData.BasePointers[i]);
combinedInfo.Pointers.emplace_back(mapData.Pointers[i]);
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
new file mode 100644
index 000000000000000..09f1f22a7209e1a
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -0,0 +1,374 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// This test checks the offload sizes, map types and base pointers and pointers
+// provided to the OpenMP kernel argument structure are correct when lowering
+// to LLVM-IR from MLIR when the fortran allocatables flag is switched on and
+// a fortran allocatable descriptor type is provided alongside the omp.map_info,
+// the test utilises mapping of array sections, full arrays and individual
+// allocated scalars.
+
+module attributes {omp.is_target_device = false} {
+ llvm.func @free(!llvm.ptr)
+ llvm.func @malloc(i64) -> !llvm.ptr
+ llvm.func @_QQmain() attributes {bindc_name = "main"} {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %5 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %6 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %7 = llvm.mlir.constant(10 : index) : i64
+ %8 = llvm.mlir.constant(5 : index) : i64
+ %9 = llvm.mlir.constant(2 : index) : i64
+ %10 = llvm.mlir.constant(1 : index) : i64
+ %11 = llvm.mlir.constant(0 : index) : i64
+ %12 = llvm.mlir.addressof @_QFEfull_arr : !llvm.ptr
+ %13 = llvm.mlir.constant(1 : i64) : i64
+ %14 = llvm.alloca %13 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "scalar"} : (i64) -> !llvm.ptr
+ %15 = llvm.mlir.zero : !llvm.ptr
+ %16 = llvm.mlir.constant(27 : i32) : i32
+ %17 = llvm.getelementptr %15[1] : (!llvm.ptr) -> !llvm.ptr, f32
+ %18 = llvm.ptrtoint %17 : !llvm.ptr to i64
+ %19 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %20 = llvm.insertvalue %18, %19[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %21 = llvm.mlir.constant(20180515 : i32) : i32
+ %22 = llvm.insertvalue %21, %20[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %23 = llvm.mlir.constant(0 : i32) : i32
+ %24 = llvm.trunc %23 : i32 to i8
+ %25 = llvm.insertvalue %24, %22[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %26 = llvm.trunc %16 : i32 to i8
+ %27 = llvm.insertvalue %26, %25[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %28 = llvm.mlir.constant(2 : i32) : i32
+ %29 = llvm.trunc %28 : i32 to i8
+ %30 = llvm.insertvalue %29, %27[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %31 = llvm.insertvalue %24, %30[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %32 = llvm.insertvalue %15, %31[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %32, %6 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %33 = llvm.load %6 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %33, %14 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %34 = llvm.mlir.addressof @_QFEsect_arr : !llvm.ptr
+ %35 = llvm.mul %18, %7 : i64
+ %36 = llvm.call @malloc(%35) {operandSegmentSizes = array<i32: 0, 1>, uniq_name = "_QFEfull_arr.alloc"} : (i64) -> !llvm.ptr
+ %37 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %38 = llvm.insertvalue %18, %37[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %39 = llvm.insertvalue %21, %38[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %40 = llvm.trunc %0 : i32 to i8
+ %41 = llvm.insertvalue %40, %39[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %42 = llvm.insertvalue %26, %41[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %43 = llvm.insertvalue %29, %42[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %44 = llvm.insertvalue %24, %43[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %45 = llvm.insertvalue %13, %44[7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %46 = llvm.insertvalue %7, %45[7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %47 = llvm.insertvalue %18, %46[7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %48 = llvm.insertvalue %36, %47[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %48, %5 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %49 = llvm.load %5 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %49, %12 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %50 = llvm.getelementptr %15[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ %51 = llvm.ptrtoint %50 : !llvm.ptr to i64
+ %52 = llvm.mul %51, %7 : i64
+ %53 = llvm.call @malloc(%52) {operandSegmentSizes = array<i32: 0, 1>, uniq_name = "_QFEsect_arr.alloc"} : (i64) -> !llvm.ptr
+ %54 = llvm.mlir.constant(9 : i32) : i32
+ %55 = llvm.insertvalue %51, %37[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %56 = llvm.insertvalue %21, %55[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %57 = llvm.insertvalue %40, %56[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %58 = llvm.trunc %54 : i32 to i8
+ %59 = llvm.insertvalue %58, %57[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %60 = llvm.insertvalue %29, %59[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %61 = llvm.insertvalue %24, %60[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %62 = llvm.insertvalue %13, %61[7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %63 = llvm.insertvalue %7, %62[7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %64 = llvm.insertvalue %51, %63[7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %65 = llvm.insertvalue %53, %64[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %65, %4 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %66 = llvm.load %4 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %66, %34 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %67 = llvm.call @malloc(%18) {operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFEscalar.alloc"} : (i64) -> !llvm.ptr
+ %68 = llvm.insertvalue %67, %31[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %68, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %69 = llvm.load %3 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %69, %14 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %70 = llvm.load %12 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %70, %2 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %71 = llvm.getelementptr %2[0, 7, %11, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %72 = llvm.load %71 : !llvm.ptr -> i64
+ %73 = llvm.getelementptr %2[0, 7, %11, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %74 = llvm.load %73 : !llvm.ptr -> i64
+ %75 = llvm.getelementptr %2[0, 7, %11, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %76 = llvm.load %75 : !llvm.ptr -> i64
+ %77 = llvm.sub %74, %10 : i64
+ %78 = omp.bounds lower_bound(%11 : i64) upper_bound(%77 : i64) stride(%76 : i64) start_idx(%72 : i64) {stride_in_bytes = true}
+ %79 = omp.map_info var_ptr(%12 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) bounds(%78) -> !llvm.ptr {is_fortran_allocatable = true, name = "full_arr"}
+ %80 = llvm.load %34 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %80, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %81 = llvm.getelementptr %1[0, 7, %11, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %82 = llvm.load %81 : !llvm.ptr -> i64
+ %83 = llvm.getelementptr %1[0, 7, %11, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %84 = llvm.getelementptr %1[0, 7, %11, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %85 = llvm.load %84 : !llvm.ptr -> i64
+ %86 = llvm.sub %9, %82 : i64
+ %87 = llvm.sub %8, %82 : i64
+ %88 = omp.bounds lower_bound(%86 : i64) upper_bound(%87 : i64) stride(%85 : i64) start_idx(%82 : i64) {stride_in_bytes = true}
+ %89 = omp.map_info var_ptr(%34 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) bounds(%88) -> !llvm.ptr {is_fortran_allocatable = true, name = "sect_arr(2:5)"}
+ %90 = omp.map_info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = "scalar"}
+ omp.target map_entries(%79 -> %arg0, %89 -> %arg1, %90 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %91 = llvm.mlir.constant(1 : i32) : i32
+ %92 = llvm.alloca %91 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %93 = llvm.alloca %91 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %94 = llvm.alloca %91 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %95 = llvm.alloca %91 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %96 = llvm.mlir.constant(true) : i1
+ %97 = llvm.mlir.constant(false) : i1
+ %98 = llvm.mlir.constant(0 : i64) : i64
+ %99 = llvm.mlir.constant(3 : i64) : i64
+ %100 = llvm.mlir.constant(1 : i64) : i64
+ %101 = llvm.mlir.constant(0 : index) : i64
+ %102 = llvm.mlir.constant(1.000000e+00 : f32) : f32
+ %103 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %103, %95 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %104 = llvm.getelementptr %95[0, 7, %101, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %105 = llvm.load %104 : !llvm.ptr -> i64
+ %106 = llvm.getelementptr %95[0, 7, %101, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %107 = llvm.getelementptr %95[0, 7, %101, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %108 = llvm.getelementptr %95[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %109 = llvm.load %108 : !llvm.ptr -> !llvm.ptr
+ %110 = llvm.sub %100, %105 : i64
+ %111 = llvm.getelementptr %109[%110] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+ llvm.store %102, %111 : f32, !llvm.ptr
+ %112 = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.store %112, %94 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
+ %113 = llvm.getelementptr %94[0, 7, %101, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %114 = llvm.load %113 : !llvm.ptr -> i64
+ %115 = llvm.getelementptr %94[0, 7, %101, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %116 = llvm.getelementptr %94[0, 7, %101, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %117 = llvm.getelementptr %94[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %118 = llvm.load %117 : !llvm.ptr -> !llvm.ptr
+ %119 = llvm.sub %99, %114 : i64
+ %120 = llvm.getelementptr %118[%119] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+ llvm.store %91, %120 : i32, !llvm.ptr
+ %121 = llvm.load %arg2 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %121, %93 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %122 = llvm.getelementptr %93[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %123 = llvm.load %122 : !llvm.ptr -> !llvm.ptr
+ %124 = llvm.ptrtoint %123 : !llvm.ptr to i64
+ %125 = llvm.icmp "ne" %124, %98 : i64
+ llvm.cond_br %125, ^bb2(%97, %123 : i1, !llvm.ptr), ^bb1
+ ^bb1: // pred: ^bb0
+ %126 = llvm.mlir.zero : !llvm.ptr
+ %127 = llvm.getelementptr %126[1] : (!llvm.ptr) -> !llvm.ptr, f32
+ %128 = llvm.ptrtoint %127 : !llvm.ptr to i64
+ %129 = llvm.call @malloc(%128) {in_type = f32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = ".auto.alloc"} : (i64) -> !llvm.ptr
+ llvm.br ^bb2(%96, %129 : i1, !llvm.ptr)
+ ^bb2(%130: i1, %131: !llvm.ptr): // 2 preds: ^bb0, ^bb1
+ %132 = llvm.sitofp %91 : i32 to f32
+ llvm.store %132, %131 : f32, !llvm.ptr
+ llvm.cond_br %130, ^bb3, ^bb6
+ ^bb3: // pred: ^bb2
+ llvm.cond_br %125, ^bb4, ^bb5
+ ^bb4: // pred: ^bb3
+ llvm.call @free(%123) : (!llvm.ptr) -> ()
+ llvm.br ^bb5
+ ^bb5: // 2 preds: ^bb3, ^bb4
+ %133 = llvm.mlir.constant(27 : i32) : i32
+ %134 = llvm.mlir.zero : !llvm.ptr
+ %135 = llvm.getelementptr %134[1] : (!llvm.ptr) -> !llvm.ptr, f32
+ %136 = llvm.ptrtoint %135 : !llvm.ptr to i64
+ %137 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %138 = llvm.insertvalue %136, %137[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %139 = llvm.mlir.constant(20180515 : i32) : i32
+ %140 = llvm.insertvalue %139, %138[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %141 = llvm.mlir.constant(0 : i32) : i32
+ %142 = llvm.trunc %141 : i32 to i8
+ %143 = llvm.insertvalue %142, %140[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %144 = llvm.trunc %133 : i32 to i8
+ %145 = llvm.insertvalue %144, %143[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %146 = llvm.mlir.constant(2 : i32) : i32
+ %147 = llvm.trunc %146 : i32 to i8
+ %148 = llvm.insertvalue %147, %145[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %149 = llvm.insertvalue %142, %148[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %150 = llvm.insertvalue %131, %149[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %150, %92 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %151 = llvm.load %92 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %151, %arg2 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ llvm.br ^bb6
+ ^bb6: // 2 preds: ^bb2, ^bb5
+ omp.terminator
+ }
+ llvm.return
+ }
+ llvm.mlir.global internal @_QFEfull_arr() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {
+ %0 = llvm.mlir.constant(0 : index) : i64
+ %1 = llvm.mlir.zero : !llvm.ptr
+ %2 = llvm.mlir.constant(27 : i32) : i32
+ %3 = llvm.getelementptr %1[1] : (!llvm.ptr) -> !llvm.ptr, f32
+ %4 = llvm.ptrtoint %3 : !llvm.ptr to i64
+ %5 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %6 = llvm.insertvalue %4, %5[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %7 = llvm.mlir.constant(20180515 : i32) : i32
+ %8 = llvm.insertvalue %7, %6[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %9 = llvm.mlir.constant(1 : i32) : i32
+ %10 = llvm.trunc %9 : i32 to i8
+ %11 = llvm.insertvalue %10, %8[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %12 = llvm.trunc %2 : i32 to i8
+ %13 = llvm.insertvalue %12, %11[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %14 = llvm.mlir.constant(2 : i32) : i32
+ %15 = llvm.trunc %14 : i32 to i8
+ %16 = llvm.insertvalue %15, %13[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %17 = llvm.mlir.constant(0 : i32) : i32
+ %18 = llvm.trunc %17 : i32 to i8
+ %19 = llvm.insertvalue %18, %16[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %20 = llvm.mlir.constant(1 : i64) : i64
+ %21 = llvm.insertvalue %20, %19[7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %22 = llvm.insertvalue %0, %21[7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %23 = llvm.insertvalue %4, %22[7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %24 = llvm.insertvalue %1, %23[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.return %24 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ }
+ llvm.mlir.global internal @_QFEsect_arr() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {
+ %0 = llvm.mlir.constant(0 : index) : i64
+ %1 = llvm.mlir.zero : !llvm.ptr
+ %2 = llvm.mlir.constant(9 : i32) : i32
+ %3 = llvm.getelementptr %1[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ %4 = llvm.ptrtoint %3 : !llvm.ptr to i64
+ %5 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %6 = llvm.insertvalue %4, %5[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %7 = llvm.mlir.constant(20180515 : i32) : i32
+ %8 = llvm.insertvalue %7, %6[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %9 = llvm.mlir.constant(1 : i32) : i32
+ %10 = llvm.trunc %9 : i32 to i8
+ %11 = llvm.insertvalue %10, %8[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %12 = llvm.trunc %2 : i32 to i8
+ %13 = llvm.insertvalue %12, %11[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %14 = llvm.mlir.constant(2 : i32) : i32
+ %15 = llvm.trunc %14 : i32 to i8
+ %16 = llvm.insertvalue %15, %13[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %17 = llvm.mlir.constant(0 : i32) : i32
+ %18 = llvm.trunc %17 : i32 to i8
+ %19 = llvm.insertvalue %18, %16[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %20 = llvm.mlir.constant(1 : i64) : i64
+ %21 = llvm.insertvalue %20, %19[7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %22 = llvm.insertvalue %0, %21[7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %23 = llvm.insertvalue %4, %22[7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %24 = llvm.insertvalue %1, %23[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.return %24 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ }
+ llvm.mlir.global linkonce constant @_QQcl.6606f061cfd55e600f1165aa97a647a5() comdat(@__llvm_comdat::@_QQcl.6606f061cfd55e600f1165aa97a647a5) {addr_space = 0 : i32} : !llvm.array<97 x i8> {
+ %0 = llvm.mlir.constant("/home/agozillo/git/flang-dev/work-dir/declare-target-map/omptarget-fortran-allocatables-test.f90\00") : !llvm.array<97 x i8>
+ llvm.return %0 : !llvm.array<97 x i8>
+ }
+ llvm.comdat @__llvm_comdat {
+ llvm.comdat_selector @_QQcl.6606f061cfd55e600f1165aa97a647a5 any
+ }
+ llvm.mlir.global external constant @_QQEnvironmentDefaults() {addr_space = 0 : i32} : !llvm.ptr {
+ %0 = llvm.mlir.zero : !llvm.ptr
+ llvm.return %0 : !llvm.ptr
+ }
+}
+
+// CHECK: @.offload_sizes = private unnamed_addr constant [9 x i64] [i64 0, i64 48, i64 0, i64 0, i64 48, i64 0, i64 0, i64 24, i64 0]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710657, i64 281474976710675, i64 32, i64 1125899906842625, i64 1125899906842643, i64 32, i64 1970324836974593, i64 1970324836974611]
+// CHECK: @.offload_mapnames = private constant [9 x ptr] [ptr @0, ptr @0, ptr @0, ptr @1, ptr @1, ptr @1, ptr @2, ptr @2, ptr @2]
+
+// CHECK: define void @_QQmain()
+
+// CHECK: %[[ARR_SECT_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
+// CHECK: %[[FULL_ARR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
+// CHECK: %[[SCALAR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
+
+// CHECK: %[[FULL_ARR_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[FULL_ARR_ALLOCA]], i32 0, i32 7, i64 0, i32 1
+// CHECK: %[[LFULL_ARR_UB:.*]] = load i64, ptr %[[FULL_ARR_UB]], align 4
+// CHECK: %[[FULL_ARR_UB2:.*]] = sub i64 %[[LFULL_ARR_UB]], 1
+
+// CHECK: %[[ARR_SECT_LB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARR_SECT_ALLOCA]], i32 0, i32 7, i64 0, i32 0
+// CHECK: %[[LARR_SECT_LB:.*]] = load i64, ptr %[[ARR_SECT_LB]], align 4
+// CHECK: %[[ARR_SECT_BOUNDS:.*]] = sub i64 2, %[[LARR_SECT_LB]]
+// CHECK: %[[ARR_SECT_BOUNDS2:.*]] = sub i64 5, %[[LARR_SECT_LB]]
+
+// CHECK: %[[FULL_ARR_UB3:.*]] = sub i64 %[[FULL_ARR_UB2]], 0
+// CHECK: %[[FULL_ARR_UB4:.*]] = add i64 %[[FULL_ARR_UB3]], 1
+// CHECK: %[[FULL_ARR_UB5:.*]] = mul i64 1, %[[FULL_ARR_UB4]]
+// CHECK: %[[FULL_ARR_TYPE_SIZE:.*]] = load i64, ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i64 0, i32 1), align 4
+// CHECK: %[[FULL_ARR_SIZE:.*]] = mul i64 %[[FULL_ARR_UB5]], %[[FULL_ARR_TYPE_SIZE]]
+
+// CHECK: %[[ARR_SECT_RANGE:.*]] = sub i64 %[[ARR_SECT_BOUNDS2]], %[[ARR_SECT_BOUNDS]]
+// CHECK: %[[ARR_SECT_ELEMENTS:.*]] = add i64 %[[ARR_SECT_RANGE]], 1
+// CHECK: %[[ARR_SECT_ELEMENTS2:.*]] = mul i64 1, %[[ARR_SECT_ELEMENTS]]
+// CHECK: %[[ARR_SECT_TYPE_SIZE:.*]] = load i64, ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i64 0, i32 1), align 4
+// CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_ELEMENTS2]], %[[ARR_SECT_TYPE_SIZE]]
+
+// CHECK: %[[SCALAR_TYPE_SIZE:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i64 0, i32 1
+// CHECK: %[[SCALAR_SIZE:.*]] = load i64, ptr %[[SCALAR_TYPE_SIZE]], align 4
+
+// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEfull_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[FULL_ARR_ELE_SZ:.*]] = load i64, ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEfull_arr, i64 0, i32 1), align 4
+// CHECK: %[[FULL_ARR_OFFSET:.*]] = mul i64 0, %[[FULL_ARR_ELE_SZ]]
+// CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @_QFEfull_arr, align 8
+// CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr i8, ptr %[[LFULL_ARR]], i64 %[[FULL_ARR_OFFSET]]
+
+// CHECK: %[[ARR_SECT_DESC_SIZE:.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i32 1) to i64), i64 ptrtoint (ptr @_QFEsect_arr to i64)), ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[ARR_SECT_ELE_SZ:.*]] = load i64, ptr getelementptr inbounds ({ ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @_QFEsect_arr, i64 0, i32 1), align 4
+// CHECK: %[[ARR_SECT_OFFSET:.*]] = mul i64 %[[ARR_SECT_BOUNDS]], %[[ARR_SECT_ELE_SZ]]
+// CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @_QFEsect_arr, align 8
+// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr i8, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET]]
+
+// CHECK: %[[SCALAR_DESC_CALC1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i32 1
+// CHECK: %[[SCALAR_DESC_CALC2:.*]] = ptrtoint ptr %[[SCALAR_DESC_CALC1]] to i64
+// CHECK: %[[SCALAR_DESC_CALC3:.*]] = ptrtoint ptr %[[SCALAR_ALLOCA]] to i64
+// CHECK: %[[SCALAR_DESC_CALC4:.*]] = sub i64 %[[SCALAR_DESC_CALC2]], %[[SCALAR_DESC_CALC3]]
+// CHECK: %[[SCALAR_DESC_SZ:.*]] = sdiv exact i64 %[[SCALAR_DESC_CALC4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK: %[[SCALAR_BASE:.*]] = getelementptr inbounds { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[SCALAR_ALLOCA]], i64 0, i32 0
+// CHECK: %[[LSCALAR_BASE:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
+// CHECK: %[[SCALAR_PTR:.*]] = getelementptr i8, ptr %[[LSCALAR_BASE]], i64 0
+
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[FULL_ARR_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[FULL_ARR_PTR]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2
+// CHECK: store i64 %[[FULL_ARR_SIZE]], ptr %[[OFFLOADSIZES]], align 8
+
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 3
+// CHECK: store i64 %[[ARR_SECT_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4
+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4
+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5
+// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5
+// CHECK: store ptr %[[ARR_SECT_PTR]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 5
+// CHECK: store i64 %[[ARR_SECT_SIZE]], ptr %[[OFFLOADSIZES]], align 8
+
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
+// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6
+// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 6
+// CHECK: store i64 %[[SCALAR_DESC_SZ]], ptr %[[OFFLOADSIZES]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7
+// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7
+// CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8
+// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8
+// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8
+// CHECK: store ptr %[[SCALAR_PTR]], ptr %[[OFFLOADPTRS]], align 8
+// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 8
+// CHECK: store i64 %[[SCALAR_SIZE]], ptr %[[OFFLOADSIZES]], align 8
>From 8724ae4832482be41e59c7c07e2420731413fc0e Mon Sep 17 00:00:00 2001
From: Andrew Gozillon <Andrew.Gozillon at amd.com>
Date: Wed, 8 Nov 2023 21:38:27 -0600
Subject: [PATCH 5/6] [OpenMP] New Fortran OpenMP offloading tests enabled by
allocatables patch series and prior work
This adds several new end to end runtime tests for Flang-new that
should work after the allocatables patch series lands.
---
.../allocatable-array-section-1d-bounds.f90 | 46 ++++++++++
.../allocatable-array-section-3d-bounds.f90 | 44 ++++++++++
.../fortran/allocatable-map-scopes.f95 | 66 +++++++++++++++
.../fortran/pointer-scopes-enter-exit-map.f90 | 83 +++++++++++++++++++
...pointer-target-array-section-3d-bounds.f90 | 43 ++++++++++
.../fortran/pointer-target-map-scopes.f95 | 64 ++++++++++++++
.../target_enter_exit_allocatables.f90 | 44 ++++++++++
.../fortran/target_enter_exit_array.f90 | 41 +++++++++
.../fortran/target_single_value_allocate.f90 | 26 ++++++
9 files changed, 457 insertions(+)
create mode 100644 openmp/libomptarget/test/offloading/fortran/allocatable-array-section-1d-bounds.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/allocatable-array-section-3d-bounds.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/allocatable-map-scopes.f95
create mode 100644 openmp/libomptarget/test/offloading/fortran/pointer-scopes-enter-exit-map.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/pointer-target-array-section-3d-bounds.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/pointer-target-map-scopes.f95
create mode 100644 openmp/libomptarget/test/offloading/fortran/target_enter_exit_allocatables.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/target_enter_exit_array.f90
create mode 100644 openmp/libomptarget/test/offloading/fortran/target_single_value_allocate.f90
diff --git a/openmp/libomptarget/test/offloading/fortran/allocatable-array-section-1d-bounds.f90 b/openmp/libomptarget/test/offloading/fortran/allocatable-array-section-1d-bounds.f90
new file mode 100644
index 000000000000000..99dbe99d40497c7
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/allocatable-array-section-1d-bounds.f90
@@ -0,0 +1,46 @@
+! Offloading test checking interaction of a
+! two 1-D allocatable arrays with a target region
+! while providing the map upper and lower bounds
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer, allocatable :: sp_read(:), sp_write(:)
+ allocate(sp_read(10))
+ allocate(sp_write(10))
+
+ do i = 1, 10
+ sp_read(i) = i
+ sp_write(i) = 0
+ end do
+
+ !$omp target map(tofrom:sp_read(2:6)) map(tofrom:sp_write(2:6))
+ do i = 1, 10
+ sp_write(i) = sp_read(i)
+ end do
+ !$omp end target
+
+ do i = 1, 10
+ print *, sp_write(i)
+ end do
+
+ deallocate(sp_read)
+ deallocate(sp_write)
+end program
+
+! CHECK: 0
+! CHECK: 2
+! CHECK: 3
+! CHECK: 4
+! CHECK: 5
+! CHECK: 6
+! CHECK: 0
+! CHECK: 0
+! CHECK: 0
+! CHECK: 0
diff --git a/openmp/libomptarget/test/offloading/fortran/allocatable-array-section-3d-bounds.f90 b/openmp/libomptarget/test/offloading/fortran/allocatable-array-section-3d-bounds.f90
new file mode 100644
index 000000000000000..0786e0fd744e786
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/allocatable-array-section-3d-bounds.f90
@@ -0,0 +1,44 @@
+! Offloading test checking interaction of allocatables
+! with multi-dimensional bounds (3-D in this case) and
+! a target region
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer, allocatable :: inArray(:,:,:)
+ integer, allocatable :: outArray(:,:,:)
+
+ allocate(inArray(3,3,3))
+ allocate(outArray(3,3,3))
+
+ do i = 1, 3
+ do j = 1, 3
+ do k = 1, 3
+ inArray(i, j, k) = 42
+ outArray(i, j, k) = 0
+ end do
+ end do
+ end do
+
+!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
+ do j = 1, 3
+ do k = 1, 3
+ outArray(k, j, 2) = inArray(k, j, 2)
+ end do
+ end do
+!$omp end target
+
+print *, outArray
+
+deallocate(inArray)
+deallocate(outArray)
+
+end program
+
+! CHECK: 0 0 0 0 0 0 0 0 0 42 42 42 42 42 42 42 42 42 0 0 0 0 0 0 0 0 0
diff --git a/openmp/libomptarget/test/offloading/fortran/allocatable-map-scopes.f95 b/openmp/libomptarget/test/offloading/fortran/allocatable-map-scopes.f95
new file mode 100644
index 000000000000000..bb47d3de96d2a9b
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/allocatable-map-scopes.f95
@@ -0,0 +1,66 @@
+! Offloading test checking interaction of allocatables
+! with target in different scopes
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+module test
+ contains
+ subroutine func_arg(arg_alloc)
+ integer, allocatable, intent (inout) :: arg_alloc(:)
+
+ !$omp target map(tofrom: arg_alloc)
+ do index = 1, 10
+ arg_alloc(index) = arg_alloc(index) + index
+ end do
+ !$omp end target
+
+ print *, arg_alloc
+ end subroutine func_arg
+end module
+
+subroutine func
+ integer, allocatable :: local_alloc(:)
+ allocate(local_alloc(10))
+
+ !$omp target map(tofrom: local_alloc)
+ do index = 1, 10
+ local_alloc(index) = index
+ end do
+ !$omp end target
+
+ print *, local_alloc
+
+ deallocate(local_alloc)
+end subroutine func
+
+
+program main
+ use test
+ integer, allocatable :: map_ptr(:)
+
+ allocate(map_ptr(10))
+
+ !$omp target map(tofrom: map_ptr)
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ call func
+
+ print *, map_ptr
+
+ call func_arg(map_ptr)
+
+ deallocate(map_ptr)
+end program
+
+! CHECK: 1 2 3 4 5 6 7 8 9 10
+! CHECK: 1 2 3 4 5 6 7 8 9 10
+! CHECK: 2 4 6 8 10 12 14 16 18 20
diff --git a/openmp/libomptarget/test/offloading/fortran/pointer-scopes-enter-exit-map.f90 b/openmp/libomptarget/test/offloading/fortran/pointer-scopes-enter-exit-map.f90
new file mode 100644
index 000000000000000..dee75af06927bee
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/pointer-scopes-enter-exit-map.f90
@@ -0,0 +1,83 @@
+! Offloading test checking interaction of pointers
+! with target in different scopes
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+module test
+ contains
+ subroutine func_arg(arg_alloc)
+ integer, pointer, intent (inout) :: arg_alloc(:)
+
+ !$omp target enter data map(alloc: arg_alloc)
+
+ !$omp target
+ do index = 1, 10
+ arg_alloc(index) = arg_alloc(index) + index
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: arg_alloc)
+
+ !$omp target exit data map(delete: arg_alloc)
+
+ print *, arg_alloc
+ end subroutine func_arg
+end module
+
+subroutine func
+ integer, pointer :: local_alloc(:)
+ allocate(local_alloc(10))
+
+ !$omp target enter data map(alloc: local_alloc)
+
+ !$omp target
+ do index = 1, 10
+ local_alloc(index) = index
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: local_alloc)
+
+ !$omp target exit data map(delete: local_alloc)
+
+ print *, local_alloc
+
+ deallocate(local_alloc)
+end subroutine func
+
+
+program main
+ use test
+ integer, pointer :: map_ptr(:)
+ allocate(map_ptr(10))
+
+ !$omp target enter data map(alloc: map_ptr)
+
+ !$omp target
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: map_ptr)
+
+ !$omp target exit data map(delete: map_ptr)
+
+ call func
+
+ print *, map_ptr
+
+ call func_arg(map_ptr)
+
+ deallocate(map_ptr)
+end program
+
+! CHECK: 1 2 3 4 5 6 7 8 9 10
+! CHECK: 1 2 3 4 5 6 7 8 9 10
+! CHECK: 2 4 6 8 10 12 14 16 18 20
diff --git a/openmp/libomptarget/test/offloading/fortran/pointer-target-array-section-3d-bounds.f90 b/openmp/libomptarget/test/offloading/fortran/pointer-target-array-section-3d-bounds.f90
new file mode 100644
index 000000000000000..ff2298cf5dbc9dc
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/pointer-target-array-section-3d-bounds.f90
@@ -0,0 +1,43 @@
+! Offloading test checking interaction of pointer
+! and target with target where 3-D bounds have
+! been specified
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer, pointer :: inArray(:,:,:)
+ integer, pointer :: outArray(:,:,:)
+ integer, target :: in(3,3,3)
+ integer, target :: out(3,3,3)
+
+ inArray => in
+ outArray => out
+
+ do i = 1, 3
+ do j = 1, 3
+ do k = 1, 3
+ inArray(i, j, k) = 42
+ outArray(i, j, k) = 0
+ end do
+ end do
+ end do
+
+!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
+ do j = 1, 3
+ do k = 1, 3
+ outArray(k, j, 2) = inArray(k, j, 2)
+ end do
+ end do
+!$omp end target
+
+ print *, outArray
+
+end program
+
+! CHECK: 0 0 0 0 0 0 0 0 0 42 42 42 42 42 42 42 42 42 0 0 0 0 0 0 0 0 0
diff --git a/openmp/libomptarget/test/offloading/fortran/pointer-target-map-scopes.f95 b/openmp/libomptarget/test/offloading/fortran/pointer-target-map-scopes.f95
new file mode 100644
index 000000000000000..d9a7000719f0e85
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/pointer-target-map-scopes.f95
@@ -0,0 +1,64 @@
+! Offloading test checking interaction of pointer
+! and target with target across multiple scopes
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+module test
+ contains
+ subroutine func_arg(arg_alloc)
+ integer, pointer, intent (inout) :: arg_alloc(:)
+
+ !$omp target map(tofrom: arg_alloc)
+ do index = 1, 10
+ arg_alloc(index) = arg_alloc(index) + index
+ end do
+ !$omp end target
+
+ print *, arg_alloc
+ end subroutine func_arg
+end module
+
+subroutine func
+ integer, pointer :: local_alloc(:)
+ integer, target :: b(10)
+ local_alloc => b
+
+ !$omp target map(tofrom: local_alloc)
+ do index = 1, 10
+ local_alloc(index) = index
+ end do
+ !$omp end target
+
+ print *, local_alloc
+ end subroutine func
+
+
+ program main
+ use test
+ integer, pointer :: map_ptr(:)
+ integer, target :: b(10)
+
+ map_ptr => b
+
+ !$omp target map(tofrom: map_ptr)
+ do index = 1, 10
+ map_ptr(index) = index
+ end do
+ !$omp end target
+
+ call func
+
+ print *, map_ptr
+
+ call func_arg(map_ptr)
+end program
+
+!CHECK: 1 2 3 4 5 6 7 8 9 10
+!CHECK: 1 2 3 4 5 6 7 8 9 10
+!CHECK: 2 4 6 8 10 12 14 16 18 20
diff --git a/openmp/libomptarget/test/offloading/fortran/target_enter_exit_allocatables.f90 b/openmp/libomptarget/test/offloading/fortran/target_enter_exit_allocatables.f90
new file mode 100644
index 000000000000000..865be95ba9682d3
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target_enter_exit_allocatables.f90
@@ -0,0 +1,44 @@
+! Offloading test checking interaction of allocatables
+! with enter, exit and target
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer, allocatable :: A(:)
+ allocate(A(10))
+
+ !$omp target enter data map(alloc: A)
+
+ !$omp target
+ do I = 1, 10
+ A(I) = I
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: A)
+
+ !$omp target exit data map(delete: A)
+
+ do i = 1, 10
+ print *, A(i)
+ end do
+
+ deallocate(A)
+end program
+
+! CHECK: 1
+! CHECK: 2
+! CHECK: 3
+! CHECK: 4
+! CHECK: 5
+! CHECK: 6
+! CHECK: 7
+! CHECK: 8
+! CHECK: 9
+! CHECK: 10
diff --git a/openmp/libomptarget/test/offloading/fortran/target_enter_exit_array.f90 b/openmp/libomptarget/test/offloading/fortran/target_enter_exit_array.f90
new file mode 100644
index 000000000000000..4a9fb6ee177f654
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target_enter_exit_array.f90
@@ -0,0 +1,41 @@
+! Offloading test checking interaction of fixed size
+! arrays with enter, exit and target
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer :: A(10)
+
+ !$omp target enter data map(alloc: A)
+
+ !$omp target
+ do I = 1, 10
+ A(I) = I
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: A)
+
+ !$omp target exit data map(delete: A)
+
+ do i = 1, 10
+ print *, A(i)
+ end do
+end program
+
+! CHECK: 1
+! CHECK: 2
+! CHECK: 3
+! CHECK: 4
+! CHECK: 5
+! CHECK: 6
+! CHECK: 7
+! CHECK: 8
+! CHECK: 9
+! CHECK: 10
diff --git a/openmp/libomptarget/test/offloading/fortran/target_single_value_allocate.f90 b/openmp/libomptarget/test/offloading/fortran/target_single_value_allocate.f90
new file mode 100644
index 000000000000000..9330e0094495246
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/fortran/target_single_value_allocate.f90
@@ -0,0 +1,26 @@
+! Offloading test checking interaction of a
+! non-array allocatable with a target region
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer, allocatable :: test
+ allocate(test)
+ test = 10
+
+!$omp target map(tofrom:test)
+ test = 50
+!$omp end target
+
+ print *, test
+
+ deallocate(test)
+end program
+
+! CHECK: 50
>From 7c75410631bf1dce7a2dabf3420a724560624080 Mon Sep 17 00:00:00 2001
From: Andrew Gozillon <Andrew.Gozillon at amd.com>
Date: Fri, 10 Nov 2023 20:16:12 -0600
Subject: [PATCH 6/6] Update based on reviewer feedback
-> change BoolAttr to UnitAttr
-> remove unrequired getLLVMIRType function
-> remove OpenMP semantic addition for getting access to struct
-> Move FirOpConversion for MapInfoOp into a seperate file and try pass method
-> fix all tests
---
.../flang/Optimizer/CodeGen/CGPasses.td | 9 +++
.../include/flang/Optimizer/CodeGen/CodeGen.h | 6 ++
flang/lib/Lower/OpenMP.cpp | 8 +--
flang/lib/Optimizer/CodeGen/CMakeLists.txt | 1 +
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 54 +++-------------
flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp | 62 +++++++++++++++++++
flang/test/Lower/OpenMP/FIR/target.f90 | 2 +-
.../Lower/OpenMP/allocatable-array-bounds.f90 | 6 +-
flang/test/Lower/OpenMP/allocatable-map.f90 | 4 +-
flang/test/Lower/OpenMP/target.f90 | 2 +-
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 2 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 22 +------
mlir/test/Dialect/OpenMP/ops.mlir | 6 +-
...target-fortran-allocatable-types-host.mlir | 6 +-
14 files changed, 106 insertions(+), 84 deletions(-)
create mode 100644 flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
index 0014298a27a2252..ae17ded0669c910 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
@@ -76,4 +76,13 @@ def BoxedProcedurePass : Pass<"boxed-procedure", "mlir::ModuleOp"> {
];
}
+def OpenMPFIRConversionsToLLVM : Pass<"openmp-fir-conversions-to-llvm", "mlir::ModuleOp"> {
+ let description = [{
+ Special conversion pass for special handling of HLFIR/FIR types or operations
+ contained within OpenMP dialect operations when converting to the LLVM-IR dialect.
+ }];
+ let constructor = "::fir::createOpenMPFIRConversionsToLLVMPass()";
+ let dependentDialects = ["mlir::LLVM::LLVMDialect, mlir::omp::OpenMPDialect"];
+}
+
#endif // FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES
diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 7d8e548d89a18b5..2683f607b4ed233 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -24,6 +24,7 @@ struct NameUniquer;
#define GEN_PASS_DECL_CODEGENREWRITE
#define GEN_PASS_DECL_TARGETREWRITEPASS
#define GEN_PASS_DECL_BOXEDPROCEDUREPASS
+#define GEN_PASS_DECL_OPENMPFIRCONVERSIONSTOLLVM
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
/// Prerequiste pass for code gen. Perform intermediate rewrites to perform
@@ -79,6 +80,11 @@ std::unique_ptr<mlir::Pass> createLLVMDialectToLLVMPass(
std::unique_ptr<mlir::Pass> createBoxedProcedurePass();
std::unique_ptr<mlir::Pass> createBoxedProcedurePass(bool useThunks);
+/// Specialised conversion of OpenMP operations containing FIR to LLVM dialect,
+/// utilised in cases where the default OpenMP dialect handling does not cover
+/// all cases.
+std::unique_ptr<mlir::Pass> createOpenMPFIRConversionsToLLVMPass();
+
// declarative passes
#define GEN_PASS_REGISTRATION
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 45001b4e4b0c104..fc397b0e0f69290 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -54,9 +54,6 @@ getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject) {
Fortran::parser::Unwrap<Fortran::parser::ArrayElement>(
designator)) {
sym = GetFirstName(arrayEle->base).symbol;
- } else if (auto *structComp = Fortran::parser::Unwrap<
- Fortran::parser::StructureComponent>(designator)) {
- sym = structComp->component.symbol;
} else if (const Fortran::parser::Name *name =
Fortran::semantics::getDesignatorNameIfDataRef(
designator)) {
@@ -1735,9 +1732,8 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
builder.getAttr<mlir::omp::VariableCaptureKindAttr>(mapCaptureType),
builder.getStringAttr(name.str()));
-
- if (isAllocatable)
- op.setIsFortranAllocatableAttr(builder.getBoolAttr(isAllocatable));
+
+ op.setIsFortranAllocatable(isAllocatable);
return op;
}
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index 0daa97b00dfa00c..b304b8c950b7f85 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -1,4 +1,5 @@
add_flang_library(FIRCodeGen
+ CodeGenOpenMP.cpp
BoxedProcedure.cpp
CGOps.cpp
CodeGen.cpp
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index d0ac9ea262be81e..efb5dbc43a08531 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -417,46 +417,6 @@ class FIROpAndTypeConversion : public FIROpConversion<FromOp> {
};
} // namespace
-namespace {
-// FIR Op specific conversion for MapInfoOp that overwrites the default OpenMP
-// Dialect lowering, this allows FIR specific lowering of types, required for
-// descriptors of allocatables currently.
-struct MapInfoOpConversion : public FIROpConversion<mlir::omp::MapInfoOp> {
- using FIROpConversion::FIROpConversion;
-
- mlir::LogicalResult
- matchAndRewrite(mlir::omp::MapInfoOp curOp, OpAdaptor adaptor,
- mlir::ConversionPatternRewriter &rewriter) const override {
- const mlir::TypeConverter *converter = getTypeConverter();
-
- llvm::SmallVector<mlir::Type> resTypes;
- if (failed(converter->convertTypes(curOp->getResultTypes(), resTypes)))
- return mlir::failure();
-
- // Copy attributes of the curOp except for the typeAttr which should
- // be converted
- llvm::SmallVector<mlir::NamedAttribute> newAttrs;
- for (mlir::NamedAttribute attr : curOp->getAttrs()) {
- if (auto typeAttr = mlir::dyn_cast<mlir::TypeAttr>(attr.getValue())) {
- mlir::Type newAttr;
- if (curOp.getIsFortranAllocatable()) {
- newAttr = this->getBoxTypePair(typeAttr.getValue()).llvm;
- } else {
- newAttr = converter->convertType(typeAttr.getValue());
- }
- newAttrs.emplace_back(attr.getName(), mlir::TypeAttr::get(newAttr));
- } else {
- newAttrs.push_back(attr);
- }
- }
-
- rewriter.replaceOpWithNewOp<mlir::omp::MapInfoOp>(
- curOp, resTypes, adaptor.getOperands(), newAttrs);
- return mlir::success();
- }
-};
-} // namespace
-
namespace {
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public FIROpConversion<fir::AddrOfOp> {
@@ -3837,6 +3797,15 @@ class FIRToLLVMLowering
if (mlir::failed(runPipeline(mathConvertionPM, mod)))
return signalPassFailure();
+ // HLFIR/FIR specific processing pass required for
+ // OpenMP dialect operations. Allows access to FIR
+ // machinary not accessible in the OpenMP dialect
+ // rewriters.
+ mlir::OpPassManager openmpConversionPM("builtin.module");
+ openmpConversionPM.addPass(fir::createOpenMPFIRConversionsToLLVMPass());
+ if (mlir::failed(runPipeline(openmpConversionPM, mod)))
+ return signalPassFailure();
+
auto *context = getModule().getContext();
fir::LLVMTypeConverter typeConverter{getModule(),
options.applyTBAA || applyTBAA,
@@ -3868,11 +3837,6 @@ class FIRToLLVMLowering
options);
mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern);
mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern);
-
- // Insert OpenMP FIR specific conversion patterns that override OpenMP
- // dialect default conversion patterns.
- pattern.insert<MapInfoOpConversion>(typeConverter, options);
-
mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern);
mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter,
pattern);
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
new file mode 100644
index 000000000000000..ad787fe5700fadb
--- /dev/null
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -0,0 +1,62 @@
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Support/FatalError.h"
+#include "flang/Optimizer/Support/InternalNames.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+
+namespace fir {
+#define GEN_PASS_DEF_OPENMPFIRCONVERSIONSTOLLVM
+#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
+} // namespace fir
+
+using namespace fir;
+
+#define DEBUG_TYPE "flang-codegen-openmp"
+
+// fir::LLVMTypeConverter for converting to LLVM IR dialect types.
+#include "flang/Optimizer/CodeGen/TypeConverter.h"
+
+namespace {
+class OpenMPFIRConversionsToLLVM
+ : public fir::impl::OpenMPFIRConversionsToLLVMBase<OpenMPFIRConversionsToLLVM> {
+public:
+ OpenMPFIRConversionsToLLVM() {}
+
+ inline mlir::ModuleOp getModule() { return getOperation(); }
+
+ void runOnOperation() override final {
+ fir::LLVMTypeConverter typeConverter{getModule(), /*applyTBAA*/ false,
+ /*forceUnifiedTBAATree*/ false};
+ mlir::IRRewriter rewriter(getModule()->getContext());
+ getModule().walk([&](mlir::Operation *op) {
+ // FIR Op specific conversion for MapInfoOp's containing BoxTypes that are
+ // descriptors this allows FIR specific lowering of types, required for
+ // descriptors of allocatables currently.
+ if (auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(op)) {
+ if (mapInfoOp.getIsFortranAllocatable() && mapInfoOp.getVarType().has_value()) {
+ mapInfoOp.setVarType(typeConverter.convertBoxTypeAsStruct(
+ mlir::cast<fir::BaseBoxType>(mapInfoOp.getVarType().value())));
+ }
+ }
+ });
+ };
+};
+} // namespace
+
+
+std::unique_ptr<mlir::Pass> fir::createOpenMPFIRConversionsToLLVMPass() {
+ return std::make_unique<OpenMPFIRConversionsToLLVM>();
+}
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index cd5326f3224e220..07f5363982c29a0 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -316,7 +316,7 @@ end subroutine omp_target_device_ptr
subroutine omp_target_device_addr
integer, pointer :: a
!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
- !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {is_fortran_allocatable = true, name = "a"}
+ !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {is_fortran_allocatable, name = "a"}
!CHECK: omp.target_data map_entries(%[[MAP]] : {{.*}}) use_device_addr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
!$omp target data map(tofrom: a) use_device_addr(a)
!CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
diff --git a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90 b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
index aa2649632a87270..7cc3bf7d211da06 100644
--- a/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
+++ b/flang/test/Lower/OpenMP/allocatable-array-bounds.f90
@@ -19,7 +19,7 @@
!HOST: %[[CONSTANT_4:.*]] = arith.constant 5 : index
!HOST: %[[UB_1:.*]] = arith.subi %[[CONSTANT_4]], %[[BOX_1]]#0 : index
!HOST: %[[BOUNDS_1:.*]] = omp.bounds lower_bound(%[[LB_1]] : index) upper_bound(%[[UB_1]] : index) stride(%[[BOX_2]]#2 : index) start_idx(%[[BOX_1]]#0 : index) {stride_in_bytes = true}
-!HOST: %[[MAP_INFO_1:.*]] = omp.map_info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable = true, name = "sp_read(2:5)"}
+!HOST: %[[MAP_INFO_1:.*]] = omp.map_info var_ptr(%[[DECLARE_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_1]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable, name = "sp_read(2:5)"}
!HOST: %[[LOAD_3:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
!HOST: %[[LOAD_4:.*]] = fir.load %[[DECLARE_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
@@ -32,7 +32,7 @@
!HOST: %[[CONSTANT_8:.*]] = arith.constant 5 : index
!HOST: %[[UB_2:.*]] = arith.subi %[[CONSTANT_8]], %[[BOX_3]]#0 : index
!HOST: %[[BOUNDS_2:.*]] = omp.bounds lower_bound(%[[LB_2]] : index) upper_bound(%[[UB_2]] : index) stride(%[[BOX_4]]#2 : index) start_idx(%[[BOX_3]]#0 : index) {stride_in_bytes = true}
-!HOST: %[[MAP_INFO_2:.*]] = omp.map_info var_ptr(%11#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable = true, name = "sp_write(2:5)"}
+!HOST: %[[MAP_INFO_2:.*]] = omp.map_info var_ptr(%11#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_2]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable, name = "sp_write(2:5)"}
subroutine read_write_section()
integer, allocatable :: sp_read(:)
integer, allocatable :: sp_write(:)
@@ -65,7 +65,7 @@ module assumed_allocatable_array_routines
!HOST: %[[CONSTANT_4:.*]] = arith.constant 5 : index
!HOST: %[[UB:.*]] = arith.subi %[[CONSTANT_4]], %[[BOX_1]]#0 : index
!HOST: %[[BOUNDS:.*]] = omp.bounds lower_bound(%[[LB]] : index) upper_bound(%[[UB]] : index) stride(%[[BOX_2]]#2 : index) start_idx(%[[BOX_1]]#0 : index) {stride_in_bytes = true}
-!HOST: %[[MAP_INFO:.*]] = omp.map_info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable = true, name = "arr_read_write(2:5)"}
+!HOST: %[[MAP_INFO:.*]] = omp.map_info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {is_fortran_allocatable, name = "arr_read_write(2:5)"}
subroutine assumed_shape_array(arr_read_write)
integer, allocatable, intent(inout) :: arr_read_write(:)
diff --git a/flang/test/Lower/OpenMP/allocatable-map.f90 b/flang/test/Lower/OpenMP/allocatable-map.f90
index 7d10ff181d76e05..4c7f45dfe2f2f56 100644
--- a/flang/test/Lower/OpenMP/allocatable-map.f90
+++ b/flang/test/Lower/OpenMP/allocatable-map.f90
@@ -2,11 +2,11 @@
!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | fir-opt --fir-to-llvm-ir | FileCheck %s --check-prefixes="LLVMOMPDIALECT"
!LLVMOMPDIALECT: %[[ALLOCA:.*]] = llvm.alloca {{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "point"} : (i64) -> !llvm.ptr
-!LLVMOMPDIALECT: %[[MAP:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(implicit, tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = "point"}
+!LLVMOMPDIALECT: %[[MAP:.*]] = omp.map_info var_ptr(%[[ALLOCA]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(implicit, tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable, name = "point"}
!LLVMOMPDIALECT: omp.target map_entries({{.*}}, %[[MAP]] -> {{.*}} : {{.*}}, !llvm.ptr) {
!HLFIRDIALECT: %[[POINTER:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_routineEpoint"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
-!HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map_info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {is_fortran_allocatable = true, name = "point"}
+!HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map_info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {is_fortran_allocatable, name = "point"}
!HLFIRDIALECT: omp.target map_entries({{.*}}, %[[POINTER_MAP]] -> {{.*}} : {{.*}}, !fir.ref<!fir.box<!fir.ptr<i32>>>) {
subroutine pointer_routine()
integer, pointer :: point
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index a409769ae4f8bf4..46de26d71d1bb99 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -340,7 +340,7 @@ subroutine omp_target_device_addr
integer, pointer :: a
!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
!CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
- !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {is_fortran_allocatable = true, name = "a"}
+ !CHECK: %[[MAP:.*]] = omp.map_info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> {{.*}} {is_fortran_allocatable, name = "a"}
!CHECK: omp.target_data map_entries(%[[MAP]] : {{.*}}) use_device_addr(%[[VAL_0_DECL]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>) {
!$omp target data map(tofrom: a) use_device_addr(a)
!CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 9fcbffdb930eb05..e0f395258f9f6a8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1152,7 +1152,7 @@ def MapInfoOp : OpenMP_Op<"map_info", [AttrSizedOperandSegments]> {
OptionalAttr<UI64Attr>:$map_type,
OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
OptionalAttr<StrAttr>:$name,
- DefaultValuedAttr<BoolAttr, "false">:$is_fortran_allocatable);
+ UnitAttr:$is_fortran_allocatable);
let results = (outs AnyType:$omp_ptr);
let description = [{
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ab57b5a5f5cc83f..93df9ac594b04bf 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1754,21 +1754,6 @@ llvm::Value *getSizeInBytes(DataLayout &dl, const mlir::Type &type,
return builder.getInt64(underlyingTypeSzInBits / 8);
}
-// Get the underlying LLVM type, this will bypass the pointer and
-// access the underlying type. Which bypasses llvm's opaque pointers
-// to get the underlying type via MLIR.
-llvm::Type *getLLVMIRType(mlir::Type inputType,
- LLVM::ModuleTranslation &moduleTranslation) {
- llvm::Type *type = moduleTranslation.convertType(inputType);
- if (auto pointerType =
- llvm::dyn_cast<mlir::omp::PointerLikeType>(inputType)) {
- if (auto eleType = pointerType.getElementType()) {
- type = moduleTranslation.convertType(eleType);
- }
- }
- return type;
-}
-
void collectMapDataFromMapOperands(MapInfoData &mapData,
llvm::SmallVectorImpl<Value> &mapOperands,
LLVM::ModuleTranslation &moduleTranslation,
@@ -1793,10 +1778,9 @@ void collectMapDataFromMapOperands(MapInfoData &mapData,
mapData.BasePointers.push_back(mapData.OriginalValue.back());
}
- mapData.BaseType.push_back(getLLVMIRType(mapOp.getVal()
- ? mapOp.getVal().getType()
- : mapOp.getVarType().value(),
- moduleTranslation));
+ mapData.BaseType.push_back(moduleTranslation.convertType(
+ mapOp.getVal() ? mapOp.getVal().getType()
+ : mapOp.getVarType().value()));
mapData.Sizes.push_back(
getSizeInBytes(dl,
mapOp.getVal() ? mapOp.getVal().getType()
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 0c2a152845da73b..548a95284f373ea 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2086,10 +2086,10 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> ()
// CHECK-LABEL: omp_targets_is_allocatable
// CHECK-SAME: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr)
func.func @omp_targets_is_allocatable(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () {
- // CHECK: %[[MAP0:.*]] = omp.map_info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = ""}
- %mapv1 = omp.map_info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = ""}
+ // CHECK: %[[MAP0:.*]] = omp.map_info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable, name = ""}
+ %mapv1 = omp.map_info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable, name = ""}
// CHECK: %[[MAP1:.*]] = omp.map_info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = ""}
- %mapv2 = omp.map_info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(to) capture(ByCopy) -> !llvm.ptr {is_fortran_allocatable = false, name = ""}
+ %mapv2 = omp.map_info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = ""}
// CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr)
omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) {
^bb0(%arg2: !llvm.ptr, %arg3 : !llvm.ptr):
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index 09f1f22a7209e1a..d3329b28ab6cfa3 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -99,7 +99,7 @@ module attributes {omp.is_target_device = false} {
%76 = llvm.load %75 : !llvm.ptr -> i64
%77 = llvm.sub %74, %10 : i64
%78 = omp.bounds lower_bound(%11 : i64) upper_bound(%77 : i64) stride(%76 : i64) start_idx(%72 : i64) {stride_in_bytes = true}
- %79 = omp.map_info var_ptr(%12 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) bounds(%78) -> !llvm.ptr {is_fortran_allocatable = true, name = "full_arr"}
+ %79 = omp.map_info var_ptr(%12 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) bounds(%78) -> !llvm.ptr {is_fortran_allocatable, name = "full_arr"}
%80 = llvm.load %34 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
llvm.store %80, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr
%81 = llvm.getelementptr %1[0, 7, %11, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
@@ -110,8 +110,8 @@ module attributes {omp.is_target_device = false} {
%86 = llvm.sub %9, %82 : i64
%87 = llvm.sub %8, %82 : i64
%88 = omp.bounds lower_bound(%86 : i64) upper_bound(%87 : i64) stride(%85 : i64) start_idx(%82 : i64) {stride_in_bytes = true}
- %89 = omp.map_info var_ptr(%34 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) bounds(%88) -> !llvm.ptr {is_fortran_allocatable = true, name = "sect_arr(2:5)"}
- %90 = omp.map_info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable = true, name = "scalar"}
+ %89 = omp.map_info var_ptr(%34 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) bounds(%88) -> !llvm.ptr {is_fortran_allocatable, name = "sect_arr(2:5)"}
+ %90 = omp.map_info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {is_fortran_allocatable, name = "scalar"}
omp.target map_entries(%79 -> %arg0, %89 -> %arg1, %90 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
%91 = llvm.mlir.constant(1 : i32) : i32
More information about the flang-commits
mailing list