[flang-commits] [flang] [flang][cuda] Set the allocator of derived type component after allocation (PR #152379)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Wed Aug 6 13:56:09 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/152379
- Move the allocator index set up after the allocate statement otherwise the derived type descriptor is not allocated.
- Support array of derived-type with device component
>From fed56a3145351593c84af7f558926356c0b54331 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 5 Aug 2025 14:46:11 -0700
Subject: [PATCH] [flang][cuda] Set the allocator of derived type component
after allocation
---
flang/include/flang/Lower/{Cuda.h => CUDA.h} | 12 +-
flang/lib/Lower/Allocatable.cpp | 14 +-
flang/lib/Lower/Bridge.cpp | 2 +-
flang/lib/Lower/CMakeLists.txt | 1 +
flang/lib/Lower/CUDA.cpp | 148 +++++++++++++++++++
flang/lib/Lower/ConvertVariable.cpp | 75 ++--------
flang/test/Lower/CUDA/cuda-set-allocator.cuf | 42 ++++--
7 files changed, 205 insertions(+), 89 deletions(-)
rename flang/include/flang/Lower/{Cuda.h => CUDA.h} (74%)
create mode 100644 flang/lib/Lower/CUDA.cpp
diff --git a/flang/include/flang/Lower/Cuda.h b/flang/include/flang/Lower/CUDA.h
similarity index 74%
rename from flang/include/flang/Lower/Cuda.h
rename to flang/include/flang/Lower/CUDA.h
index b6f849e3d63f0..6aad06e150ad1 100644
--- a/flang/include/flang/Lower/Cuda.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -1,4 +1,4 @@
-//===-- Lower/Cuda.h -- Cuda Fortran utilities ------------------*- C++ -*-===//
+//===-- Lower/CUDA.h -- CUDA Fortran utilities ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,7 @@
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Runtime/allocator-registry-consts.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
@@ -37,6 +38,15 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
return kDefaultAllocator;
}
+void initializeDeviceComponentAllocator(
+ Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);
+
+mlir::Type gatherDeviceComponentCoordinatesAndType(
+ fir::FirOpBuilder &builder, mlir::Location loc,
+ const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
+ llvm::SmallVector<mlir::Value> &coordinates);
+
} // end namespace Fortran::lower
#endif // FORTRAN_LOWER_CUDA_H
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 219f9205f45d5..ce9d8944387e1 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -13,9 +13,9 @@
#include "flang/Lower/Allocatable.h"
#include "flang/Evaluate/tools.h"
#include "flang/Lower/AbstractConverter.h"
+#include "flang/Lower/CUDA.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
-#include "flang/Lower/Cuda.h"
#include "flang/Lower/IterationSpace.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/OpenACC.h"
@@ -445,10 +445,14 @@ class AllocateStmtHelper {
/*mustBeHeap=*/true);
}
- void postAllocationAction(const Allocation &alloc) {
+ void postAllocationAction(const Allocation &alloc,
+ const fir::MutableBoxValue &box) {
if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
Fortran::lower::attachDeclarePostAllocAction(converter, builder,
alloc.getSymbol());
+ if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
+ Fortran::lower::initializeDeviceComponentAllocator(
+ converter, alloc.getSymbol(), box);
}
void setPinnedToFalse() {
@@ -481,7 +485,7 @@ class AllocateStmtHelper {
// Pointers must use PointerAllocate so that their deallocations
// can be validated.
genInlinedAllocation(alloc, box);
- postAllocationAction(alloc);
+ postAllocationAction(alloc, box);
setPinnedToFalse();
return;
}
@@ -504,7 +508,7 @@ class AllocateStmtHelper {
genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol());
}
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
- postAllocationAction(alloc);
+ postAllocationAction(alloc, box);
errorManager.assignStat(builder, loc, stat);
}
@@ -647,7 +651,7 @@ class AllocateStmtHelper {
setPinnedToFalse();
}
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
- postAllocationAction(alloc);
+ postAllocationAction(alloc, box);
errorManager.assignStat(builder, loc, stat);
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6b7efe6b57db3..1bad46f80612b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -13,6 +13,7 @@
#include "flang/Lower/Bridge.h"
#include "flang/Lower/Allocatable.h"
+#include "flang/Lower/CUDA.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/Coarray.h"
#include "flang/Lower/ConvertCall.h"
@@ -20,7 +21,6 @@
#include "flang/Lower/ConvertExprToHLFIR.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/ConvertVariable.h"
-#include "flang/Lower/Cuda.h"
#include "flang/Lower/DirectivesCommon.h"
#include "flang/Lower/HostAssociations.h"
#include "flang/Lower/IO.h"
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 8e20abf0e9f2d..1d1c7ddda8e9b 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -15,6 +15,7 @@ add_flang_library(FortranLower
ConvertProcedureDesignator.cpp
ConvertType.cpp
ConvertVariable.cpp
+ CUDA.cpp
CustomIntrinsicCall.cpp
HlfirIntrinsics.cpp
HostAssociations.cpp
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
new file mode 100644
index 0000000000000..60b33bb5680c2
--- /dev/null
+++ b/flang/lib/Lower/CUDA.cpp
@@ -0,0 +1,148 @@
+//===-- CUDA.cpp -- CUDA Fortran specific lowering ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Lower/CUDA.h"
+
+#define DEBUG_TYPE "flang-lower-cuda"
+
+void Fortran::lower::initializeDeviceComponentAllocator(
+ Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
+ if (const auto *details{
+ sym.GetUltimate()
+ .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+ const Fortran::semantics::DeclTypeSpec *type{details->type()};
+ const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
+ : nullptr};
+ if (derived) {
+ if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
+ return; // No device components.
+
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ mlir::Location loc = converter.getCurrentLocation();
+
+ mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());
+
+ // Only pointer and allocatable needs post allocation initialization
+ // of components descriptors.
+ if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
+ return;
+
+ // Extract the derived type.
+ mlir::Type ty = fir::getDerivedType(baseTy);
+ auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
+ assert(recTy && "expected fir::RecordType");
+
+ if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
+ baseTy = boxTy.getEleTy();
+ baseTy = fir::unwrapRefType(baseTy);
+
+ Fortran::semantics::UltimateComponentIterator components{*derived};
+ mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
+ mlir::Value addr;
+ if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
+ mlir::Type idxTy = builder.getIndexType();
+ mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+ mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
+ llvm::SmallVector<fir::DoLoopOp> loops;
+ llvm::SmallVector<mlir::Value> indices;
+ llvm::SmallVector<mlir::Value> extents;
+ for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
+ mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+ auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
+ idxTy, loadedBox, dim);
+ mlir::Value lbub = mlir::arith::AddIOp::create(
+ builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
+ mlir::Value ext =
+ mlir::arith::SubIOp::create(builder, loc, lbub, one);
+ mlir::Value cmp = mlir::arith::CmpIOp::create(
+ builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
+ ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
+ extents.push_back(ext);
+
+ auto loop = fir::DoLoopOp::create(
+ builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
+ dimInfo.getResult(2), /*isUnordered=*/true,
+ /*finalCount=*/false, mlir::ValueRange{});
+ loops.push_back(loop);
+ indices.push_back(loop.getInductionVar());
+ builder.setInsertionPointToStart(loop.getBody());
+ }
+ mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
+ auto shape = fir::ShapeOp::create(builder, loc, extents);
+ addr = fir::ArrayCoorOp::create(
+ builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
+ /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
+ } else {
+ addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
+ }
+ for (const auto &compSym : components) {
+ if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
+ llvm::SmallVector<mlir::Value> coord;
+ mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
+ builder, loc, compSym, recTy, coord);
+ assert(coord.size() == 1 && "expect one coordinate");
+ mlir::Value comp = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(
+ builder.getContext(), compSym);
+ cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
+ }
+ }
+ }
+ }
+}
+
+mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
+ fir::FirOpBuilder &builder, mlir::Location loc,
+ const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
+ llvm::SmallVector<mlir::Value> &coordinates) {
+ unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
+ mlir::Type fieldTy;
+ if (fieldIdx != std::numeric_limits<unsigned>::max()) {
+ // Field found in the base record type.
+ auto fieldName = recTy.getTypeList()[fieldIdx].first;
+ fieldTy = recTy.getTypeList()[fieldIdx].second;
+ mlir::Value fieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fir::FieldType::get(fieldTy.getContext()), fieldName,
+ recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ coordinates.push_back(fieldIndex);
+ } else {
+ // Field not found in base record type, search in potential
+ // record type components.
+ for (auto component : recTy.getTypeList()) {
+ if (auto childRecTy = mlir::dyn_cast<fir::RecordType>(component.second)) {
+ fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
+ if (fieldIdx != std::numeric_limits<unsigned>::max()) {
+ mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fir::FieldType::get(childRecTy.getContext()),
+ component.first, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ coordinates.push_back(parentFieldIndex);
+ auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
+ fieldTy = childRecTy.getTypeList()[fieldIdx].second;
+ mlir::Value childFieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fir::FieldType::get(fieldTy.getContext()),
+ fieldName, childRecTy,
+ /*typeParams=*/mlir::ValueRange{});
+ coordinates.push_back(childFieldIndex);
+ break;
+ }
+ }
+ }
+ }
+ if (coordinates.empty())
+ TODO(loc, "device resident component in complex derived-type hierarchy");
+ return fieldTy;
+}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index a4a8a697e02ae..6a64320965095 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -14,12 +14,12 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/BoxAnalyzer.h"
+#include "flang/Lower/CUDA.h"
#include "flang/Lower/CallInterface.h"
#include "flang/Lower/ConvertConstant.h"
#include "flang/Lower/ConvertExpr.h"
#include "flang/Lower/ConvertExprToHLFIR.h"
#include "flang/Lower/ConvertProcedureDesignator.h"
-#include "flang/Lower/Cuda.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/StatementContext.h"
@@ -814,81 +814,24 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);
- if (mlir::isa<fir::SequenceType>(baseTy) &&
- (fir::isAllocatableType(fir::getBase(exv).getType()) ||
- fir::isPointerType(fir::getBase(exv).getType())))
+ if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+ fir::isPointerType(fir::getBase(exv).getType()))
return; // Allocator index need to be set after allocation.
auto recTy =
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
assert(recTy && "expected fir::RecordType");
- llvm::SmallVector<mlir::Value> coordinates;
Fortran::semantics::UltimateComponentIterator components{*derived};
for (const auto &sym : components) {
if (Fortran::semantics::IsDeviceAllocatable(sym)) {
- unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
- mlir::Type fieldTy;
- llvm::SmallVector<mlir::Value> coordinates;
-
- if (fieldIdx != std::numeric_limits<unsigned>::max()) {
- // Field found in the base record type.
- auto fieldName = recTy.getTypeList()[fieldIdx].first;
- fieldTy = recTy.getTypeList()[fieldIdx].second;
- mlir::Value fieldIndex = fir::FieldIndexOp::create(
- builder, loc, fir::FieldType::get(fieldTy.getContext()),
- fieldName, recTy,
- /*typeParams=*/mlir::ValueRange{});
- coordinates.push_back(fieldIndex);
- } else {
- // Field not found in base record type, search in potential
- // record type components.
- for (auto component : recTy.getTypeList()) {
- if (auto childRecTy =
- mlir::dyn_cast<fir::RecordType>(component.second)) {
- fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
- if (fieldIdx != std::numeric_limits<unsigned>::max()) {
- mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
- builder, loc,
- fir::FieldType::get(childRecTy.getContext()),
- component.first, recTy,
- /*typeParams=*/mlir::ValueRange{});
- coordinates.push_back(parentFieldIndex);
- auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
- fieldTy = childRecTy.getTypeList()[fieldIdx].second;
- mlir::Value childFieldIndex = fir::FieldIndexOp::create(
- builder, loc, fir::FieldType::get(fieldTy.getContext()),
- fieldName, childRecTy,
- /*typeParams=*/mlir::ValueRange{});
- coordinates.push_back(childFieldIndex);
- break;
- }
- }
- }
- }
-
- if (coordinates.empty())
- TODO(loc, "device resident component in complex derived-type "
- "hierarchy");
-
+ llvm::SmallVector<mlir::Value> coord;
+ mlir::Type fieldTy =
+ Fortran::lower::gatherDeviceComponentCoordinatesAndType(
+ builder, loc, sym, recTy, coord);
mlir::Value base = fir::getBase(exv);
- mlir::Value comp;
- if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(base.getType()))) {
- mlir::Value box = fir::LoadOp::create(builder, loc, base);
- mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box);
- llvm::SmallVector<mlir::Value> lenParams;
- assert(coordinates.size() == 1 && "expect one coordinate");
- auto field = mlir::dyn_cast<fir::FieldIndexOp>(
- coordinates[0].getDefiningOp());
- comp = hlfir::DesignateOp::create(
- builder, loc, builder.getRefType(fieldTy), addr,
- /*component=*/field.getFieldName(),
- /*componentShape=*/mlir::Value{},
- hlfir::DesignateOp::Subscripts{});
- } else {
- comp = fir::CoordinateOp::create(
- builder, loc, builder.getRefType(fieldTy), base, coordinates);
- }
+ mlir::Value comp = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(fieldTy), base, coord);
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), sym);
diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
index e3bb181f65398..d783f340fe9a4 100644
--- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf
+++ b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
@@ -23,34 +23,44 @@ contains
subroutine sub2()
type(ty_device), pointer :: d1
+ allocate(d1)
end subroutine
! CHECK-LABEL: func.func @_QMm1Psub2()
! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: cuf.allocate
+! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
subroutine sub3()
type(ty_device), allocatable :: d1
+ allocate(d1)
end subroutine
! CHECK-LABEL: func.func @_QMm1Psub3()
! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: cuf.allocate
+! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+
+ subroutine sub4()
+ type(ty_device), allocatable :: d1(:,:)
+ allocate(d1(10, 10))
+ end subroutine
+
+! CHECK-LABEL: func.func @_QMm1Psub4()
+! CHECK: cuf.allocate
+! CHECK-COUNT-2: fir.do_loop
+! CHECK-COUNT-2: cuf.set_allocator_idx
end module
More information about the flang-commits
mailing list