[flang-commits] [flang] [flang][cuda] Set the allocator of derived type component after allocation (PR #152379)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Wed Aug 6 13:56:09 PDT 2025


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/152379

- Move the allocator index set up after the allocate statement otherwise the derived type descriptor is not allocated.
- Support array of derived-type with device component

>From fed56a3145351593c84af7f558926356c0b54331 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 5 Aug 2025 14:46:11 -0700
Subject: [PATCH] [flang][cuda] Set the allocator of derived type component
 after allocation

---
 flang/include/flang/Lower/{Cuda.h => CUDA.h} |  12 +-
 flang/lib/Lower/Allocatable.cpp              |  14 +-
 flang/lib/Lower/Bridge.cpp                   |   2 +-
 flang/lib/Lower/CMakeLists.txt               |   1 +
 flang/lib/Lower/CUDA.cpp                     | 148 +++++++++++++++++++
 flang/lib/Lower/ConvertVariable.cpp          |  75 ++--------
 flang/test/Lower/CUDA/cuda-set-allocator.cuf |  42 ++++--
 7 files changed, 205 insertions(+), 89 deletions(-)
 rename flang/include/flang/Lower/{Cuda.h => CUDA.h} (74%)
 create mode 100644 flang/lib/Lower/CUDA.cpp

diff --git a/flang/include/flang/Lower/Cuda.h b/flang/include/flang/Lower/CUDA.h
similarity index 74%
rename from flang/include/flang/Lower/Cuda.h
rename to flang/include/flang/Lower/CUDA.h
index b6f849e3d63f0..6aad06e150ad1 100644
--- a/flang/include/flang/Lower/Cuda.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -1,4 +1,4 @@
-//===-- Lower/Cuda.h -- Cuda Fortran utilities ------------------*- C++ -*-===//
+//===-- Lower/CUDA.h -- CUDA Fortran utilities ------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,7 @@
 
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Runtime/allocator-registry-consts.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
@@ -37,6 +38,15 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
   return kDefaultAllocator;
 }
 
+void initializeDeviceComponentAllocator(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);
+
+mlir::Type gatherDeviceComponentCoordinatesAndType(
+    fir::FirOpBuilder &builder, mlir::Location loc,
+    const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
+    llvm::SmallVector<mlir::Value> &coordinates);
+
 } // end namespace Fortran::lower
 
 #endif // FORTRAN_LOWER_CUDA_H
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 219f9205f45d5..ce9d8944387e1 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -13,9 +13,9 @@
 #include "flang/Lower/Allocatable.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Lower/AbstractConverter.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/ConvertVariable.h"
-#include "flang/Lower/Cuda.h"
 #include "flang/Lower/IterationSpace.h"
 #include "flang/Lower/Mangler.h"
 #include "flang/Lower/OpenACC.h"
@@ -445,10 +445,14 @@ class AllocateStmtHelper {
                                        /*mustBeHeap=*/true);
   }
 
-  void postAllocationAction(const Allocation &alloc) {
+  void postAllocationAction(const Allocation &alloc,
+                            const fir::MutableBoxValue &box) {
     if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
       Fortran::lower::attachDeclarePostAllocAction(converter, builder,
                                                    alloc.getSymbol());
+    if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
+      Fortran::lower::initializeDeviceComponentAllocator(
+          converter, alloc.getSymbol(), box);
   }
 
   void setPinnedToFalse() {
@@ -481,7 +485,7 @@ class AllocateStmtHelper {
       // Pointers must use PointerAllocate so that their deallocations
       // can be validated.
       genInlinedAllocation(alloc, box);
-      postAllocationAction(alloc);
+      postAllocationAction(alloc, box);
       setPinnedToFalse();
       return;
     }
@@ -504,7 +508,7 @@ class AllocateStmtHelper {
           genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol());
     }
     fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
-    postAllocationAction(alloc);
+    postAllocationAction(alloc, box);
     errorManager.assignStat(builder, loc, stat);
   }
 
@@ -647,7 +651,7 @@ class AllocateStmtHelper {
       setPinnedToFalse();
     }
     fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
-    postAllocationAction(alloc);
+    postAllocationAction(alloc, box);
     errorManager.assignStat(builder, loc, stat);
   }
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6b7efe6b57db3..1bad46f80612b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -13,6 +13,7 @@
 #include "flang/Lower/Bridge.h"
 
 #include "flang/Lower/Allocatable.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/Coarray.h"
 #include "flang/Lower/ConvertCall.h"
@@ -20,7 +21,6 @@
 #include "flang/Lower/ConvertExprToHLFIR.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/ConvertVariable.h"
-#include "flang/Lower/Cuda.h"
 #include "flang/Lower/DirectivesCommon.h"
 #include "flang/Lower/HostAssociations.h"
 #include "flang/Lower/IO.h"
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 8e20abf0e9f2d..1d1c7ddda8e9b 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -15,6 +15,7 @@ add_flang_library(FortranLower
   ConvertProcedureDesignator.cpp
   ConvertType.cpp
   ConvertVariable.cpp
+  CUDA.cpp
   CustomIntrinsicCall.cpp
   HlfirIntrinsics.cpp
   HostAssociations.cpp
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
new file mode 100644
index 0000000000000..60b33bb5680c2
--- /dev/null
+++ b/flang/lib/Lower/CUDA.cpp
@@ -0,0 +1,148 @@
+//===-- CUDA.cpp -- CUDA Fortran specific lowering ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Lower/CUDA.h"
+
+#define DEBUG_TYPE "flang-lower-cuda"
+
+void Fortran::lower::initializeDeviceComponentAllocator(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
+  if (const auto *details{
+          sym.GetUltimate()
+              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+    const Fortran::semantics::DeclTypeSpec *type{details->type()};
+    const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
+                                                            : nullptr};
+    if (derived) {
+      if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
+        return; // No device components.
+
+      fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+      mlir::Location loc = converter.getCurrentLocation();
+
+      mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());
+
+      // Only pointer and allocatable needs post allocation initialization
+      // of components descriptors.
+      if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
+        return;
+
+      // Extract the derived type.
+      mlir::Type ty = fir::getDerivedType(baseTy);
+      auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
+      assert(recTy && "expected fir::RecordType");
+
+      if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
+        baseTy = boxTy.getEleTy();
+      baseTy = fir::unwrapRefType(baseTy);
+
+      Fortran::semantics::UltimateComponentIterator components{*derived};
+      mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
+      mlir::Value addr;
+      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
+        mlir::Type idxTy = builder.getIndexType();
+        mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+        mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
+        llvm::SmallVector<fir::DoLoopOp> loops;
+        llvm::SmallVector<mlir::Value> indices;
+        llvm::SmallVector<mlir::Value> extents;
+        for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
+          mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+          auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
+                                                idxTy, loadedBox, dim);
+          mlir::Value lbub = mlir::arith::AddIOp::create(
+              builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
+          mlir::Value ext =
+              mlir::arith::SubIOp::create(builder, loc, lbub, one);
+          mlir::Value cmp = mlir::arith::CmpIOp::create(
+              builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
+          ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
+          extents.push_back(ext);
+
+          auto loop = fir::DoLoopOp::create(
+              builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
+              dimInfo.getResult(2), /*isUnordered=*/true,
+              /*finalCount=*/false, mlir::ValueRange{});
+          loops.push_back(loop);
+          indices.push_back(loop.getInductionVar());
+          builder.setInsertionPointToStart(loop.getBody());
+        }
+        mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
+        auto shape = fir::ShapeOp::create(builder, loc, extents);
+        addr = fir::ArrayCoorOp::create(
+            builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
+            /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
+      } else {
+        addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
+      }
+      for (const auto &compSym : components) {
+        if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
+          llvm::SmallVector<mlir::Value> coord;
+          mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
+              builder, loc, compSym, recTy, coord);
+          assert(coord.size() == 1 && "expect one coordinate");
+          mlir::Value comp = fir::CoordinateOp::create(
+              builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
+          cuf::DataAttributeAttr dataAttr =
+              Fortran::lower::translateSymbolCUFDataAttribute(
+                  builder.getContext(), compSym);
+          cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
+        }
+      }
+    }
+  }
+}
+
+mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
+    fir::FirOpBuilder &builder, mlir::Location loc,
+    const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
+    llvm::SmallVector<mlir::Value> &coordinates) {
+  unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
+  mlir::Type fieldTy;
+  if (fieldIdx != std::numeric_limits<unsigned>::max()) {
+    // Field found in the base record type.
+    auto fieldName = recTy.getTypeList()[fieldIdx].first;
+    fieldTy = recTy.getTypeList()[fieldIdx].second;
+    mlir::Value fieldIndex = fir::FieldIndexOp::create(
+        builder, loc, fir::FieldType::get(fieldTy.getContext()), fieldName,
+        recTy,
+        /*typeParams=*/mlir::ValueRange{});
+    coordinates.push_back(fieldIndex);
+  } else {
+    // Field not found in base record type, search in potential
+    // record type components.
+    for (auto component : recTy.getTypeList()) {
+      if (auto childRecTy = mlir::dyn_cast<fir::RecordType>(component.second)) {
+        fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
+        if (fieldIdx != std::numeric_limits<unsigned>::max()) {
+          mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
+              builder, loc, fir::FieldType::get(childRecTy.getContext()),
+              component.first, recTy,
+              /*typeParams=*/mlir::ValueRange{});
+          coordinates.push_back(parentFieldIndex);
+          auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
+          fieldTy = childRecTy.getTypeList()[fieldIdx].second;
+          mlir::Value childFieldIndex = fir::FieldIndexOp::create(
+              builder, loc, fir::FieldType::get(fieldTy.getContext()),
+              fieldName, childRecTy,
+              /*typeParams=*/mlir::ValueRange{});
+          coordinates.push_back(childFieldIndex);
+          break;
+        }
+      }
+    }
+  }
+  if (coordinates.empty())
+    TODO(loc, "device resident component in complex derived-type hierarchy");
+  return fieldTy;
+}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index a4a8a697e02ae..6a64320965095 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -14,12 +14,12 @@
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Lower/Allocatable.h"
 #include "flang/Lower/BoxAnalyzer.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/ConvertConstant.h"
 #include "flang/Lower/ConvertExpr.h"
 #include "flang/Lower/ConvertExprToHLFIR.h"
 #include "flang/Lower/ConvertProcedureDesignator.h"
-#include "flang/Lower/Cuda.h"
 #include "flang/Lower/Mangler.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/StatementContext.h"
@@ -814,81 +814,24 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
         baseTy = boxTy.getEleTy();
       baseTy = fir::unwrapRefType(baseTy);
 
-      if (mlir::isa<fir::SequenceType>(baseTy) &&
-          (fir::isAllocatableType(fir::getBase(exv).getType()) ||
-           fir::isPointerType(fir::getBase(exv).getType())))
+      if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+          fir::isPointerType(fir::getBase(exv).getType()))
         return; // Allocator index need to be set after allocation.
 
       auto recTy =
           mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
       assert(recTy && "expected fir::RecordType");
 
-      llvm::SmallVector<mlir::Value> coordinates;
       Fortran::semantics::UltimateComponentIterator components{*derived};
       for (const auto &sym : components) {
         if (Fortran::semantics::IsDeviceAllocatable(sym)) {
-          unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString());
-          mlir::Type fieldTy;
-          llvm::SmallVector<mlir::Value> coordinates;
-
-          if (fieldIdx != std::numeric_limits<unsigned>::max()) {
-            // Field found in the base record type.
-            auto fieldName = recTy.getTypeList()[fieldIdx].first;
-            fieldTy = recTy.getTypeList()[fieldIdx].second;
-            mlir::Value fieldIndex = fir::FieldIndexOp::create(
-                builder, loc, fir::FieldType::get(fieldTy.getContext()),
-                fieldName, recTy,
-                /*typeParams=*/mlir::ValueRange{});
-            coordinates.push_back(fieldIndex);
-          } else {
-            // Field not found in base record type, search in potential
-            // record type components.
-            for (auto component : recTy.getTypeList()) {
-              if (auto childRecTy =
-                      mlir::dyn_cast<fir::RecordType>(component.second)) {
-                fieldIdx = childRecTy.getFieldIndex(sym.name().ToString());
-                if (fieldIdx != std::numeric_limits<unsigned>::max()) {
-                  mlir::Value parentFieldIndex = fir::FieldIndexOp::create(
-                      builder, loc,
-                      fir::FieldType::get(childRecTy.getContext()),
-                      component.first, recTy,
-                      /*typeParams=*/mlir::ValueRange{});
-                  coordinates.push_back(parentFieldIndex);
-                  auto fieldName = childRecTy.getTypeList()[fieldIdx].first;
-                  fieldTy = childRecTy.getTypeList()[fieldIdx].second;
-                  mlir::Value childFieldIndex = fir::FieldIndexOp::create(
-                      builder, loc, fir::FieldType::get(fieldTy.getContext()),
-                      fieldName, childRecTy,
-                      /*typeParams=*/mlir::ValueRange{});
-                  coordinates.push_back(childFieldIndex);
-                  break;
-                }
-              }
-            }
-          }
-
-          if (coordinates.empty())
-            TODO(loc, "device resident component in complex derived-type "
-                      "hierarchy");
-
+          llvm::SmallVector<mlir::Value> coord;
+          mlir::Type fieldTy =
+              Fortran::lower::gatherDeviceComponentCoordinatesAndType(
+                  builder, loc, sym, recTy, coord);
           mlir::Value base = fir::getBase(exv);
-          mlir::Value comp;
-          if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(base.getType()))) {
-            mlir::Value box = fir::LoadOp::create(builder, loc, base);
-            mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box);
-            llvm::SmallVector<mlir::Value> lenParams;
-            assert(coordinates.size() == 1 && "expect one coordinate");
-            auto field = mlir::dyn_cast<fir::FieldIndexOp>(
-                coordinates[0].getDefiningOp());
-            comp = hlfir::DesignateOp::create(
-                builder, loc, builder.getRefType(fieldTy), addr,
-                /*component=*/field.getFieldName(),
-                /*componentShape=*/mlir::Value{},
-                hlfir::DesignateOp::Subscripts{});
-          } else {
-            comp = fir::CoordinateOp::create(
-                builder, loc, builder.getRefType(fieldTy), base, coordinates);
-          }
+          mlir::Value comp = fir::CoordinateOp::create(
+              builder, loc, builder.getRefType(fieldTy), base, coord);
           cuf::DataAttributeAttr dataAttr =
               Fortran::lower::translateSymbolCUFDataAttribute(
                   builder.getContext(), sym);
diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
index e3bb181f65398..d783f340fe9a4 100644
--- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf
+++ b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
@@ -23,34 +23,44 @@ contains
 
   subroutine sub2()
     type(ty_device), pointer :: d1
+    allocate(d1)
   end subroutine
 
 ! CHECK-LABEL: func.func @_QMm1Psub2()
 ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
 ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: cuf.allocate
+! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
 
   subroutine sub3()
     type(ty_device), allocatable :: d1
+    allocate(d1)
   end subroutine
 
 ! CHECK-LABEL: func.func @_QMm1Psub3()
 ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
 ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: cuf.allocate
+! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
+
+  subroutine sub4()
+    type(ty_device), allocatable :: d1(:,:)
+    allocate(d1(10, 10))
+  end subroutine
+
+! CHECK-LABEL: func.func @_QMm1Psub4()
+! CHECK: cuf.allocate
+! CHECK-COUNT-2: fir.do_loop
+! CHECK-COUNT-2: cuf.set_allocator_idx
 
 end module



More information about the flang-commits mailing list