[flang-commits] [flang] [llvm] [flang][cuda] Use a reference for asyncObject (PR #138186)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu May 1 12:25:49 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/138186
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation.
New tentative with some fix. The previous was reverted yesterday.
>From 09cf791ff58cc70a02a0364a8cfd7705c92aa831 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
=?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
=?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Wed, 30 Apr 2025 14:02:29 -0700
Subject: [PATCH] [flang][cuda] Use a reference for asyncObject
Switch from `int64_t` to `int64_t*` to fit with the rest of the
implementation.
---
.../flang-rt/runtime/allocator-registry.h | 4 +--
.../include/flang-rt/runtime/descriptor.h | 6 ++---
.../flang-rt/runtime/reduction-templates.h | 2 +-
flang-rt/lib/cuda/allocatable.cpp | 8 +++---
flang-rt/lib/cuda/allocator.cpp | 20 +++++++-------
flang-rt/lib/cuda/descriptor.cpp | 2 +-
flang-rt/lib/runtime/allocatable.cpp | 12 ++++-----
flang-rt/lib/runtime/array-constructor.cpp | 4 +--
flang-rt/lib/runtime/assign.cpp | 4 +--
flang-rt/lib/runtime/character.cpp | 20 +++++++-------
flang-rt/lib/runtime/copy.cpp | 4 +--
flang-rt/lib/runtime/derived.cpp | 6 ++---
flang-rt/lib/runtime/descriptor.cpp | 4 +--
flang-rt/lib/runtime/extrema.cpp | 4 +--
flang-rt/lib/runtime/findloc.cpp | 2 +-
flang-rt/lib/runtime/matmul-transpose.cpp | 2 +-
flang-rt/lib/runtime/matmul.cpp | 2 +-
flang-rt/lib/runtime/misc-intrinsic.cpp | 2 +-
flang-rt/lib/runtime/pointer.cpp | 2 +-
flang-rt/lib/runtime/temporary-stack.cpp | 2 +-
flang-rt/lib/runtime/tools.cpp | 2 +-
flang-rt/lib/runtime/transformational.cpp | 4 +--
flang-rt/unittests/Evaluate/reshape.cpp | 2 +-
flang-rt/unittests/Runtime/Allocatable.cpp | 4 +--
.../unittests/Runtime/CUDA/Allocatable.cpp | 12 ++++++---
.../unittests/Runtime/CUDA/AllocatorCUF.cpp | 4 +--
flang-rt/unittests/Runtime/CUDA/Memory.cpp | 4 +--
flang-rt/unittests/Runtime/CharacterTest.cpp | 2 +-
flang-rt/unittests/Runtime/CommandTest.cpp | 8 +++---
flang-rt/unittests/Runtime/TemporaryStack.cpp | 4 +--
flang-rt/unittests/Runtime/tools.h | 2 +-
.../flang/Optimizer/Dialect/CUF/CUFOps.td | 11 ++++----
.../include/flang/Runtime/CUDA/allocatable.h | 8 +++---
flang/include/flang/Runtime/CUDA/allocator.h | 8 +++---
flang/include/flang/Runtime/CUDA/pointer.h | 8 +++---
flang/include/flang/Runtime/allocatable.h | 7 ++---
flang/lib/Lower/Allocatable.cpp | 2 +-
.../Optimizer/Builder/Runtime/Allocatable.cpp | 7 +++--
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 22 ++++++++--------
.../Optimizer/Transforms/CUFOpConversion.cpp | 10 +++----
flang/test/Fir/CUDA/cuda-allocate.fir | 18 ++++++-------
flang/test/Fir/cuf-invalid.fir | 5 ++--
flang/test/Fir/cuf.mlir | 7 +++--
flang/test/HLFIR/elemental-codegen.fir | 6 ++---
flang/test/Lower/CUDA/cuda-allocatable.cuf | 9 +++----
.../acc-declare-unwrap-defaultbounds.f90 | 4 +--
flang/test/Lower/OpenACC/acc-declare.f90 | 4 +--
flang/test/Lower/allocatable-polymorphic.f90 | 26 +++++++++----------
flang/test/Lower/allocatable-runtime.f90 | 4 +--
flang/test/Lower/allocate-mold.f90 | 4 +--
flang/test/Lower/polymorphic.f90 | 2 +-
flang/test/Transforms/lower-repack-arrays.fir | 8 +++---
52 files changed, 169 insertions(+), 171 deletions(-)
diff --git a/flang-rt/include/flang-rt/runtime/allocator-registry.h b/flang-rt/include/flang-rt/runtime/allocator-registry.h
index 33e8e2c7d7850..f0ba77a360736 100644
--- a/flang-rt/include/flang-rt/runtime/allocator-registry.h
+++ b/flang-rt/include/flang-rt/runtime/allocator-registry.h
@@ -19,7 +19,7 @@
namespace Fortran::runtime {
-using AllocFct = void *(*)(std::size_t, std::int64_t);
+using AllocFct = void *(*)(std::size_t, std::int64_t *);
using FreeFct = void (*)(void *);
typedef struct Allocator_t {
@@ -28,7 +28,7 @@ typedef struct Allocator_t {
} Allocator_t;
static RT_API_ATTRS void *MallocWrapper(
- std::size_t size, [[maybe_unused]] std::int64_t) {
+ std::size_t size, [[maybe_unused]] std::int64_t *) {
return std::malloc(size);
}
#ifdef RT_DEVICE_COMPILATION
diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index 9907e7866e7bf..c98e6b14850cb 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -29,8 +29,8 @@
#include <cstdio>
#include <cstring>
-/// Value used for asyncId when no specific stream is specified.
-static constexpr std::int64_t kNoAsyncId = -1;
+/// Value used for asyncObject when no specific stream is specified.
+static constexpr std::int64_t *kNoAsyncObject = nullptr;
namespace Fortran::runtime {
@@ -372,7 +372,7 @@ class Descriptor {
// before calling. It (re)computes the byte strides after
// allocation. Does not allocate automatic components or
// perform default component initialization.
- RT_API_ATTRS int Allocate(std::int64_t asyncId);
+ RT_API_ATTRS int Allocate(std::int64_t *asyncObject);
RT_API_ATTRS void SetByteStrides();
// Deallocates storage; does not call FINAL subroutines or
diff --git a/flang-rt/include/flang-rt/runtime/reduction-templates.h b/flang-rt/include/flang-rt/runtime/reduction-templates.h
index 77f77a592a476..18412708b02c5 100644
--- a/flang-rt/include/flang-rt/runtime/reduction-templates.h
+++ b/flang-rt/include/flang-rt/runtime/reduction-templates.h
@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
// as the element size of the source.
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
CFI_attribute_allocatable);
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp
index 432974d18a3e3..c77819e9440d7 100644
--- a/flang-rt/lib/cuda/allocatable.cpp
+++ b/flang-rt/lib/cuda/allocatable.cpp
@@ -23,7 +23,7 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
-int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
@@ -41,7 +41,7 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
return stat;
}
-int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
+int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
if (desc.HasAddendum()) {
@@ -63,7 +63,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -76,7 +76,7 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocateSync)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 51119ab251168..f4289c55bd8de 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -98,7 +98,7 @@ static unsigned findAllocation(void *ptr) {
return allocNotFound;
}
-static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
+static void insertAllocation(void *ptr, std::size_t size, cudaStream_t stream) {
CriticalSection critical{lock};
initAllocations();
if (numDeviceAllocations >= maxDeviceAllocations) {
@@ -106,7 +106,7 @@ static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
}
deviceAllocations[numDeviceAllocations].ptr = ptr;
deviceAllocations[numDeviceAllocations].size = size;
- deviceAllocations[numDeviceAllocations].stream = (cudaStream_t)stream;
+ deviceAllocations[numDeviceAllocations].stream = stream;
++numDeviceAllocations;
qsort(deviceAllocations, numDeviceAllocations, sizeof(DeviceAllocation),
compareDeviceAlloc);
@@ -136,7 +136,7 @@ void RTDEF(CUFRegisterAllocator)() {
}
void *CUFAllocPinned(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
return p;
@@ -144,18 +144,18 @@ void *CUFAllocPinned(
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
-void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
+void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) {
void *p;
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
} else {
- if (asyncId == kNoAsyncId) {
+ if (asyncObject == kNoAsyncObject) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
} else {
CUDA_REPORT_IF_ERROR(
- cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
- insertAllocation(p, sizeInBytes, asyncId);
+ cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject));
+ insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject);
}
}
return p;
@@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) {
}
void *CUFAllocManaged(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -184,9 +184,9 @@ void *CUFAllocManaged(
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
void *CUFAllocUnified(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+ std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
// Call alloc managed for the time being.
- return CUFAllocManaged(sizeInBytes, asyncId);
+ return CUFAllocManaged(sizeInBytes, asyncObject);
}
void CUFFreeUnified(void *p) {
diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp
index 175e8c0ef8438..7b768f91af29d 100644
--- a/flang-rt/lib/cuda/descriptor.cpp
+++ b/flang-rt/lib/cuda/descriptor.cpp
@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN
Descriptor *RTDEF(CUFAllocDescriptor)(
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
return reinterpret_cast<Descriptor *>(
- CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
+ CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr));
}
void RTDEF(CUFFreeDescriptor)(
diff --git a/flang-rt/lib/runtime/allocatable.cpp b/flang-rt/lib/runtime/allocatable.cpp
index 6acce34eb9a9e..ef18da6ea0786 100644
--- a/flang-rt/lib/runtime/allocatable.cpp
+++ b/flang-rt/lib/runtime/allocatable.cpp
@@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)(
}
}
-int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor,
+ std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
if (!descriptor.IsAllocatable()) {
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
} else if (descriptor.IsAllocated()) {
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
} else {
- int stat{
- ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
+ int stat{ReturnError(
+ terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)};
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(AllocatableAllocate)(
- alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
+ alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
DoFromSourceAssign(alloc, source, terminator);
diff --git a/flang-rt/lib/runtime/array-constructor.cpp b/flang-rt/lib/runtime/array-constructor.cpp
index 67b3b5e1e0f50..858fac7bf2b39 100644
--- a/flang-rt/lib/runtime/array-constructor.cpp
+++ b/flang-rt/lib/runtime/array-constructor.cpp
@@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
initialAllocationSize(fromElements, to.ElementBytes())};
to.GetDimension(0).SetBounds(1, allocationSize);
RTNAME(AllocatableAllocate)
- (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+ (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
to.GetDimension(0).SetBounds(1, fromElements);
vector.actualAllocationSize = allocationSize;
@@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
// first value: there should be no reallocation.
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
RTNAME(AllocatableAllocate)
- (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
+ (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
vector.actualAllocationSize = previousToElements;
}
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index 4a813cd489022..8a4fa36c91479 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -99,7 +99,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
toDim.SetByteStride(stride);
stride *= toDim.Extent();
}
- int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
+ int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))};
if (result == StatOk && derived && !derived->noInitializationNeeded()) {
result = ReturnError(terminator, Initialize(to, *derived, terminator));
}
@@ -277,7 +277,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
// entity, otherwise, the Deallocate() below will not
// free the descriptor memory.
newFrom.raw().attribute = CFI_attribute_allocatable;
- auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
+ auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))};
if (stat == StatOk) {
if (HasDynamicComponent(from)) {
// If 'from' has allocatable/automatic component, we cannot
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index d1152ee1caefb..f140d202e118e 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -118,7 +118,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("Compare: could not allocate storage for result");
}
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
@@ -173,7 +173,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
}
for (SubscriptValue resultAt{0}; elements-- > 0;
@@ -227,7 +227,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("LEN_TRIM: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -427,7 +427,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -530,7 +530,8 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
- RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(
+ terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
}
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
@@ -606,7 +607,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
- if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash(
"CharacterConcatenate: could not allocate storage for result");
}
@@ -629,7 +630,8 @@ void RTDEF(CharacterConcatenateScalar1)(
accumulator.set_base_addr(nullptr);
std::size_t oldLen{accumulator.ElementBytes()};
accumulator.raw().elem_len += chars;
- RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(
+ terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
FreeMemory(old);
}
@@ -831,7 +833,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
std::size_t origBytes{string.ElementBytes()};
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
CFI_attribute_allocatable);
- if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
+ if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("REPEAT could not allocate storage for result");
}
const char *from{string.OffsetElement()};
@@ -865,7 +867,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
}
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
CFI_attribute_allocatable);
- RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
}
diff --git a/flang-rt/lib/runtime/copy.cpp b/flang-rt/lib/runtime/copy.cpp
index 3a0f98cf8d376..f990f46e0be66 100644
--- a/flang-rt/lib/runtime/copy.cpp
+++ b/flang-rt/lib/runtime/copy.cpp
@@ -171,8 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
if (toDesc.raw().base_addr != nullptr) {
toDesc.set_base_addr(nullptr);
- RUNTIME_CHECK(
- terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
+ RUNTIME_CHECK(terminator,
+ toDesc.Allocate(/*asyncObject=*/nullptr) == CFI_SUCCESS);
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
fromPtr + component->offset())};
copyStack.emplace(toDesc, fromDesc);
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index c46ea806a430a..35037036f63e7 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -52,7 +52,7 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
allocDesc.raw().attribute = CFI_attribute_allocatable;
if (comp.genre() == typeInfo::Component::Genre::Automatic) {
stat = ReturnError(
- terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
+ terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -153,7 +153,7 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone,
if (origDesc.IsAllocated()) {
cloneDesc.ApplyMold(origDesc, origDesc.rank());
stat = ReturnError(
- terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
+ terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
if (const typeInfo::DerivedType *
@@ -260,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
copy.raw().attribute = CFI_attribute_allocatable;
Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
- copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
+ copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
ShallowCopyDiscontiguousToContiguous(copy, descriptor);
argDescriptor = ©
}
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 3debf53bb5290..67336d01380e0 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
#endif
}
-RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
+RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
std::size_t elementBytes{ElementBytes()};
if (static_cast<std::int64_t>(elementBytes) < 0) {
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
// result is implementation defined, always allocate at least one byte.
- void *p{alloc(byteSize ? byteSize : 1, asyncId)};
+ void *p{alloc(byteSize ? byteSize : 1, asyncObject)};
if (!p) {
return CFI_ERROR_MEM_ALLOCATION;
}
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp
index 4c7f8e8b99e8f..03e574a8fbff1 100644
--- a/flang-rt/lib/runtime/extrema.cpp
+++ b/flang-rt/lib/runtime/extrema.cpp
@@ -152,7 +152,7 @@ inline RT_API_ATTRS void CharacterMaxOrMinLoc(const char *intrinsic,
CFI_attribute_allocatable);
result.GetDimension(0).SetBounds(1, extent[0]);
Terminator terminator{source, line};
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
@@ -181,7 +181,7 @@ inline RT_API_ATTRS void TotalNumericMaxOrMinLoc(const char *intrinsic,
CFI_attribute_allocatable);
result.GetDimension(0).SetBounds(1, extent[0]);
Terminator terminator{source, line};
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp
index e3e98953b0cfc..5485f4b97bd2f 100644
--- a/flang-rt/lib/runtime/findloc.cpp
+++ b/flang-rt/lib/runtime/findloc.cpp
@@ -220,7 +220,7 @@ void RTDEF(Findloc)(Descriptor &result, const Descriptor &x,
CFI_attribute_allocatable);
result.GetDimension(0).SetBounds(1, extent[0]);
Terminator terminator{source, line};
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"FINDLOC: could not allocate memory for result; STAT=%d", stat);
}
diff --git a/flang-rt/lib/runtime/matmul-transpose.cpp b/flang-rt/lib/runtime/matmul-transpose.cpp
index 17987fb73d943..c9e21502b629e 100644
--- a/flang-rt/lib/runtime/matmul-transpose.cpp
+++ b/flang-rt/lib/runtime/matmul-transpose.cpp
@@ -183,7 +183,7 @@ inline static RT_API_ATTRS void DoMatmulTranspose(
for (int j{0}; j < resRank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"MATMUL-TRANSPOSE: could not allocate memory for result; STAT=%d",
stat);
diff --git a/flang-rt/lib/runtime/matmul.cpp b/flang-rt/lib/runtime/matmul.cpp
index 0ff92cecbbcb8..5acb345725212 100644
--- a/flang-rt/lib/runtime/matmul.cpp
+++ b/flang-rt/lib/runtime/matmul.cpp
@@ -255,7 +255,7 @@ static inline RT_API_ATTRS void DoMatmul(
for (int j{0}; j < resRank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"MATMUL: could not allocate memory for result; STAT=%d", stat);
}
diff --git a/flang-rt/lib/runtime/misc-intrinsic.cpp b/flang-rt/lib/runtime/misc-intrinsic.cpp
index 2fde859869ef0..a8797f48fa667 100644
--- a/flang-rt/lib/runtime/misc-intrinsic.cpp
+++ b/flang-rt/lib/runtime/misc-intrinsic.cpp
@@ -30,7 +30,7 @@ static RT_API_ATTRS void TransferImpl(Descriptor &result,
if (const DescriptorAddendum * addendum{mold.Addendum()}) {
*result.Addendum() = *addendum;
}
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
Terminator{sourceFile, line}.Crash(
"TRANSFER: could not allocate memory for result; STAT=%d", stat);
}
diff --git a/flang-rt/lib/runtime/pointer.cpp b/flang-rt/lib/runtime/pointer.cpp
index fd2427f4124b5..7331f7bbc3a75 100644
--- a/flang-rt/lib/runtime/pointer.cpp
+++ b/flang-rt/lib/runtime/pointer.cpp
@@ -129,7 +129,7 @@ RT_API_ATTRS void *AllocateValidatedPointerPayload(
byteSize = ((byteSize + align - 1) / align) * align;
std::size_t total{byteSize + sizeof(std::uintptr_t)};
AllocFct alloc{allocatorRegistry.GetAllocator(allocatorIdx)};
- void *p{alloc(total, /*asyncId=*/-1)};
+ void *p{alloc(total, /*asyncObject=*/nullptr)};
if (p && allocatorIdx == 0) {
// Fill the footer word with the XOR of the ones' complement of
// the base address, which is a value that would be highly unlikely
diff --git a/flang-rt/lib/runtime/temporary-stack.cpp b/flang-rt/lib/runtime/temporary-stack.cpp
index 3a952b1fdbcca..3f6fd8ee15a80 100644
--- a/flang-rt/lib/runtime/temporary-stack.cpp
+++ b/flang-rt/lib/runtime/temporary-stack.cpp
@@ -148,7 +148,7 @@ void DescriptorStorage<COPY_VALUES>::push(const Descriptor &source) {
if constexpr (COPY_VALUES) {
// copy the data pointed to by the box
box.set_base_addr(nullptr);
- box.Allocate(kNoAsyncId);
+ box.Allocate(kNoAsyncObject);
RTNAME(AssignTemporary)
(box, source, terminator_.sourceFileName(), terminator_.sourceLine());
}
diff --git a/flang-rt/lib/runtime/tools.cpp b/flang-rt/lib/runtime/tools.cpp
index 5d6e35faca70a..1f965b0b151ce 100644
--- a/flang-rt/lib/runtime/tools.cpp
+++ b/flang-rt/lib/runtime/tools.cpp
@@ -261,7 +261,7 @@ RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
for (int j{0}; j + 1 < xRank; ++j) {
result.GetDimension(j).SetBounds(1, resultExtent[j]);
}
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
diff --git a/flang-rt/lib/runtime/transformational.cpp b/flang-rt/lib/runtime/transformational.cpp
index a7d5a48530ee9..3df314a4e966b 100644
--- a/flang-rt/lib/runtime/transformational.cpp
+++ b/flang-rt/lib/runtime/transformational.cpp
@@ -132,7 +132,7 @@ static inline RT_API_ATTRS std::size_t AllocateResult(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: Could not allocate memory for result (stat=%d)", function, stat);
}
@@ -157,7 +157,7 @@ static inline RT_API_ATTRS std::size_t AllocateBesselResult(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
- if (int stat{result.Allocate(kNoAsyncId)}) {
+ if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: Could not allocate memory for result (stat=%d)", function, stat);
}
diff --git a/flang-rt/unittests/Evaluate/reshape.cpp b/flang-rt/unittests/Evaluate/reshape.cpp
index 67a0be124e8e0..f84de443965d1 100644
--- a/flang-rt/unittests/Evaluate/reshape.cpp
+++ b/flang-rt/unittests/Evaluate/reshape.cpp
@@ -26,7 +26,7 @@ int main() {
for (int j{0}; j < 3; ++j) {
source->GetDimension(j).SetBounds(1, sourceExtent[j]);
}
- TEST(source->Allocate(kNoAsyncId) == CFI_SUCCESS);
+ TEST(source->Allocate(kNoAsyncObject) == CFI_SUCCESS);
TEST(source->IsAllocated());
MATCH(2, source->GetDimension(0).Extent());
MATCH(3, source->GetDimension(1).Extent());
diff --git a/flang-rt/unittests/Runtime/Allocatable.cpp b/flang-rt/unittests/Runtime/Allocatable.cpp
index a6fcdd0d1423c..b394312e5bc5a 100644
--- a/flang-rt/unittests/Runtime/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/Allocatable.cpp
@@ -26,7 +26,7 @@ TEST(AllocatableTest, MoveAlloc) {
auto b{createAllocatable(TypeCategory::Integer, 4)};
// ALLOCATE(a(20))
a->GetDimension(0).SetBounds(1, 20);
- a->Allocate(kNoAsyncId);
+ a->Allocate(kNoAsyncObject);
EXPECT_TRUE(a->IsAllocated());
EXPECT_FALSE(b->IsAllocated());
@@ -46,7 +46,7 @@ TEST(AllocatableTest, MoveAlloc) {
// move_alloc with errMsg
auto errMsg{Descriptor::Create(
sizeof(char), 64, nullptr, 0, nullptr, CFI_attribute_allocatable)};
- errMsg->Allocate(kNoAsyncId);
+ errMsg->Allocate(kNoAsyncObject);
RTNAME(MoveAlloc)(*b, *a, nullptr, false, errMsg.get(), __FILE__, __LINE__);
EXPECT_FALSE(a->IsAllocated());
EXPECT_TRUE(b->IsAllocated());
diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
index 89649aa95ad93..9935ae0eaac2f 100644
--- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
@@ -42,7 +42,8 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocatable) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes()));
RTNAME(AllocatableAllocate)
- (*a, kNoAsyncId, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ (*a, kNoAsyncObject, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+ __LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
cudaDeviceSynchronize();
@@ -82,19 +83,22 @@ TEST(AllocatableCUFTest, StreamDeviceAllocatable) {
RTNAME(AllocatableSetBounds)(*c, 0, 1, 100);
RTNAME(AllocatableAllocate)
- (*a, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ (*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+ __LINE__);
EXPECT_TRUE(a->IsAllocated());
cudaDeviceSynchronize();
EXPECT_EQ(cudaSuccess, cudaGetLastError());
RTNAME(AllocatableAllocate)
- (*b, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ (*b, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+ __LINE__);
EXPECT_TRUE(b->IsAllocated());
cudaDeviceSynchronize();
EXPECT_EQ(cudaSuccess, cudaGetLastError());
RTNAME(AllocatableAllocate)
- (*c, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ (*c, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+ __LINE__);
EXPECT_TRUE(c->IsAllocated());
cudaDeviceSynchronize();
EXPECT_EQ(cudaSuccess, cudaGetLastError());
diff --git a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 2f1dc64dc8c5a..f1f931e87a86e 100644
--- a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -35,7 +35,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
- (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+ (*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
@@ -54,7 +54,7 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
- (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
+ (*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
diff --git a/flang-rt/unittests/Runtime/CUDA/Memory.cpp b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
index b3612073657ab..7915baca6c203 100644
--- a/flang-rt/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
@@ -50,8 +50,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
RTNAME(AllocatableAllocate)
- (*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
- __LINE__);
+ (*dev, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
+ __FILE__, __LINE__);
EXPECT_TRUE(dev->IsAllocated());
// Create temp array to transfer to device.
diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp
index 0f28e883671bc..2c7af27b9da77 100644
--- a/flang-rt/unittests/Runtime/CharacterTest.cpp
+++ b/flang-rt/unittests/Runtime/CharacterTest.cpp
@@ -35,7 +35,7 @@ OwningPtr<Descriptor> CreateDescriptor(const std::vector<SubscriptValue> &shape,
for (int j{0}; j < rank; ++j) {
descriptor->GetDimension(j).SetBounds(2, shape[j] + 1);
}
- if (descriptor->Allocate(kNoAsyncId) != 0) {
+ if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
diff --git a/flang-rt/unittests/Runtime/CommandTest.cpp b/flang-rt/unittests/Runtime/CommandTest.cpp
index 9d0da4ce8dd4e..6919a98105b8a 100644
--- a/flang-rt/unittests/Runtime/CommandTest.cpp
+++ b/flang-rt/unittests/Runtime/CommandTest.cpp
@@ -26,7 +26,7 @@ template <std::size_t n = 64>
static OwningPtr<Descriptor> CreateEmptyCharDescriptor() {
OwningPtr<Descriptor> descriptor{Descriptor::Create(
sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
- if (descriptor->Allocate(kNoAsyncId) != 0) {
+ if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
return descriptor;
@@ -36,7 +36,7 @@ static OwningPtr<Descriptor> CharDescriptor(const char *value) {
std::size_t n{std::strlen(value)};
OwningPtr<Descriptor> descriptor{Descriptor::Create(
sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
- if (descriptor->Allocate(kNoAsyncId) != 0) {
+ if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
std::memcpy(descriptor->OffsetElement(), value, n);
@@ -47,7 +47,7 @@ template <int kind = sizeof(std::int64_t)>
static OwningPtr<Descriptor> EmptyIntDescriptor() {
OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
- if (descriptor->Allocate(kNoAsyncId) != 0) {
+ if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
return descriptor;
@@ -57,7 +57,7 @@ template <int kind = sizeof(std::int64_t)>
static OwningPtr<Descriptor> IntDescriptor(const int &value) {
OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
- if (descriptor->Allocate(kNoAsyncId) != 0) {
+ if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
std::memcpy(descriptor->OffsetElement<int>(), &value, sizeof(int));
diff --git a/flang-rt/unittests/Runtime/TemporaryStack.cpp b/flang-rt/unittests/Runtime/TemporaryStack.cpp
index 3291794f22fc1..65725840459ab 100644
--- a/flang-rt/unittests/Runtime/TemporaryStack.cpp
+++ b/flang-rt/unittests/Runtime/TemporaryStack.cpp
@@ -59,7 +59,7 @@ TEST(TemporaryStack, ValueStackBasic) {
Descriptor &outputDesc2{testDescriptorStorage[2].descriptor()};
inputDesc.Establish(code, elementBytes, descriptorPtr, rank, extent);
- inputDesc.Allocate(kNoAsyncId);
+ inputDesc.Allocate(kNoAsyncObject);
ASSERT_EQ(inputDesc.IsAllocated(), true);
uint32_t *inputData = static_cast<uint32_t *>(inputDesc.raw().base_addr);
for (std::size_t i = 0; i < inputDesc.Elements(); ++i) {
@@ -123,7 +123,7 @@ TEST(TemporaryStack, ValueStackMultiSize) {
boxDims.extent = extent[dim];
boxDims.sm = elementBytes;
}
- desc->Allocate(kNoAsyncId);
+ desc->Allocate(kNoAsyncObject);
// fill the array with some data to test
for (uint32_t i = 0; i < desc->Elements(); ++i) {
diff --git a/flang-rt/unittests/Runtime/tools.h b/flang-rt/unittests/Runtime/tools.h
index a1eba45647a80..4ada862df110b 100644
--- a/flang-rt/unittests/Runtime/tools.h
+++ b/flang-rt/unittests/Runtime/tools.h
@@ -42,7 +42,7 @@ static OwningPtr<Descriptor> MakeArray(const std::vector<int> &shape,
for (int j{0}; j < rank; ++j) {
result->GetDimension(j).SetBounds(1, shape[j]);
}
- int stat{result->Allocate(kNoAsyncId)};
+ int stat{result->Allocate(kNoAsyncObject)};
EXPECT_EQ(stat, 0) << stat;
EXPECT_LE(data.size(), result->Elements());
char *p{result->OffsetElement<char>()};
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 46cc59cda1612..e38738230ffbc 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -95,12 +95,11 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
}];
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
- Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
- Optional<AnyIntegerType>:$stream,
- Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
- Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
- cuf_DataAttributeAttr:$data_attr,
- UnitAttr:$hasStat);
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+ Optional<fir_ReferenceType>:$stream,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
+ cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat);
let results = (outs AnyIntegerType:$stat);
diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h
index 822f2d4a2b297..6c97afa9e10e8 100644
--- a/flang/include/flang/Runtime/CUDA/allocatable.h
+++ b/flang/include/flang/Runtime/CUDA/allocatable.h
@@ -17,14 +17,14 @@ namespace Fortran::runtime::cuda {
extern "C" {
/// Perform allocation of the descriptor.
-int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1,
+int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t *stream = nullptr,
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
-int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1,
+int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t *stream = nullptr,
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
@@ -32,14 +32,14 @@ int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1,
/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 18ddf75ac3852..59fdb22b6e663 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -20,16 +20,16 @@ extern "C" {
void RTDECL(CUFRegisterAllocator)();
}
-void *CUFAllocPinned(std::size_t, std::int64_t);
+void *CUFAllocPinned(std::size_t, std::int64_t *);
void CUFFreePinned(void *);
-void *CUFAllocDevice(std::size_t, std::int64_t);
+void *CUFAllocDevice(std::size_t, std::int64_t *);
void CUFFreeDevice(void *);
-void *CUFAllocManaged(std::size_t, std::int64_t);
+void *CUFAllocManaged(std::size_t, std::int64_t *);
void CUFFreeManaged(void *);
-void *CUFAllocUnified(std::size_t, std::int64_t);
+void *CUFAllocUnified(std::size_t, std::int64_t *);
void CUFFreeUnified(void *);
} // namespace Fortran::runtime::cuda
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index 7fbd8f8e061f2..bdfc3268e0814 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -17,14 +17,14 @@ namespace Fortran::runtime::cuda {
extern "C" {
/// Perform allocation of the descriptor.
-int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
+int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t *stream = nullptr,
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
-int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1,
+int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t *stream = nullptr,
bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
@@ -32,14 +32,14 @@ int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1,
/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
- const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
- const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h
index 6895f8af5e2a8..863c07494e7c3 100644
--- a/flang/include/flang/Runtime/allocatable.h
+++ b/flang/include/flang/Runtime/allocatable.h
@@ -94,9 +94,10 @@ int RTDECL(AllocatableCheckLengthParameter)(Descriptor &,
// Successfully allocated memory is initialized if the allocatable has a
// derived type, and is always initialized by AllocatableAllocateSource().
// Performs all necessary coarray synchronization and validation actions.
-int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+int RTDECL(AllocatableAllocate)(Descriptor &,
+ std::int64_t *asyncObject = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 8d0444a6e5bd4..af8169c8e7f7b 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -773,7 +773,7 @@ class AllocateStmtHelper {
mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr;
mlir::Value stream =
streamExpr
- ? fir::getBase(converter.genExprValue(loc, *streamExpr, stmtCtx))
+ ? fir::getBase(converter.genExprAddr(loc, *streamExpr, stmtCtx))
: nullptr;
mlir::Value pinned =
pinnedExpr
diff --git a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
index 28452d3b486da..cd5f1f6d098c3 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
@@ -76,8 +76,7 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder,
mlir::func::FuncOp func{
fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc, builder)};
mlir::FunctionType fTy{func.getFunctionType()};
- mlir::Value asyncId =
- builder.createIntegerConstant(loc, builder.getI64Type(), -1);
+ mlir::Value asyncObject = builder.createNullConstant(loc);
mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)};
mlir::Value sourceLine{
fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))};
@@ -88,7 +87,7 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder,
errMsg = builder.create<fir::AbsentOp>(loc, boxNoneTy).getResult();
}
llvm::SmallVector<mlir::Value> args{
- fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat,
- errMsg, sourceFile, sourceLine)};
+ fir::runtime::createArguments(builder, loc, fTy, desc, asyncObject,
+ hasStat, errMsg, sourceFile, sourceLine)};
builder.create<fir::CallOp>(loc, func, args);
}
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 24033bc15b8eb..687007d957225 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -76,6 +76,16 @@ llvm::LogicalResult cuf::FreeOp::verify() { return checkCudaAttr(*this); }
// AllocateOp
//===----------------------------------------------------------------------===//
+template <typename OpTy>
+static llvm::LogicalResult checkStreamType(OpTy op) {
+ if (!op.getStream())
+ return mlir::success();
+ if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getStream().getType()))
+ if (!refTy.getEleTy().isInteger(64))
+ return op.emitOpError("stream is expected to be an i64 reference");
+ return mlir::success();
+}
+
llvm::LogicalResult cuf::AllocateOp::verify() {
if (getPinned() && getStream())
return emitOpError("pinned and stream cannot appears at the same time");
@@ -92,7 +102,7 @@ llvm::LogicalResult cuf::AllocateOp::verify() {
"expect errmsg to be a reference to/or a box type value");
if (getErrmsg() && !getHasStat())
return emitOpError("expect stat attribute when errmsg is provided");
- return mlir::success();
+ return checkStreamType(*this);
}
//===----------------------------------------------------------------------===//
@@ -143,16 +153,6 @@ llvm::LogicalResult cuf::DeallocateOp::verify() {
// KernelLaunchOp
//===----------------------------------------------------------------------===//
-template <typename OpTy>
-static llvm::LogicalResult checkStreamType(OpTy op) {
- if (!op.getStream())
- return mlir::success();
- if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(op.getStream().getType()))
- if (!refTy.getEleTy().isInteger(64))
- return op.emitOpError("stream is expected to be an i64 reference");
- return mlir::success();
-}
-
llvm::LogicalResult cuf::KernelLaunchOp::verify() {
return checkStreamType(*this);
}
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index e70ceb3a67d98..3a3eab9e8e37b 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -128,17 +128,15 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
mlir::IntegerType::get(op.getContext(), 1)));
if (op.getSource()) {
mlir::Value stream =
- op.getStream()
- ? op.getStream()
- : builder.createIntegerConstant(loc, fTy.getInput(2), -1);
+ op.getStream() ? op.getStream()
+ : builder.createNullConstant(loc, fTy.getInput(2));
args = fir::runtime::createArguments(
builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
hasStat, errmsg, sourceFile, sourceLine);
} else {
mlir::Value stream =
- op.getStream()
- ? op.getStream()
- : builder.createIntegerConstant(loc, fTy.getInput(1), -1);
+ op.getStream() ? op.getStream()
+ : builder.createNullConstant(loc, fTy.getInput(1));
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
stream, pinned, hasStat, errmsg,
sourceFile, sourceLine);
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 095ad92d5deb5..ea7890c9aac52 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -19,7 +19,7 @@ func.func @_QPsub1() {
// CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
// CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -47,7 +47,7 @@ func.func @_QPsub3() {
// CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -87,7 +87,7 @@ func.func @_QPsub5() {
}
// CHECK-LABEL: func.func @_QPsub5()
-// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} {
// CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMdataEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
// CHECK: _FortranAAllocatableSetBounds
// CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
func.func @_QPallocate_source() {
@@ -142,7 +142,7 @@ func.func @_QPallocate_source() {
// CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
// CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.box<none>
-// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?x?xf32>>> {
@@ -170,16 +170,14 @@ func.func @_QQallocate_stream() {
%1 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
%2 = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"}
%3 = fir.declare %2 {uniq_name = "_QFEstream1"} : (!fir.ref<i64>) -> !fir.ref<i64>
- %4 = fir.load %3 : !fir.ref<i64>
- %5 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> stream(%4 : i64) {data_attr = #cuf.cuda<device>} -> i32
+ %5 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> stream(%3 : !fir.ref<i64>) {data_attr = #cuf.cuda<device>} -> i32
return
}
// CHECK-LABEL: func.func @_QQallocate_stream()
// CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"}
// CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref<i64>) -> !fir.ref<i64>
-// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
-// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
func.func @_QPp_alloc() {
@@ -268,6 +266,6 @@ func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
// CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
// CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
// CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref<!fir.logical<4>>) -> !fir.ref<i1>
-// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
} // end of module
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index a3b9be3ee8223..dceb8f6fde236 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -2,13 +2,12 @@
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %1 = fir.alloca i32
+ %s = fir.alloca i64
%pinned = fir.alloca i1
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- %s = fir.load %1 : !fir.ref<i32>
// expected-error at +1{{'cuf.allocate' op pinned and stream cannot appears at the same time}}
- %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : !fir.ref<i64>) pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
return
}
diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir
index d38b26a4548ed..f80a70eca34a3 100644
--- a/flang/test/Fir/cuf.mlir
+++ b/flang/test/Fir/cuf.mlir
@@ -18,15 +18,14 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %1 = fir.alloca i32
+ %1 = fir.alloca i64
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- %s = fir.load %1 : !fir.ref<i32>
- %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {data_attr = #cuf.cuda<device>} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%1 : !fir.ref<i64>) {data_attr = #cuf.cuda<device>} -> i32
return
}
-// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {data_attr = #cuf.cuda<device>} -> i32
+// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : !fir.ref<i64>) {data_attr = #cuf.cuda<device>} -> i32
// -----
diff --git a/flang/test/HLFIR/elemental-codegen.fir b/flang/test/HLFIR/elemental-codegen.fir
index a715479f16115..67af4261470f7 100644
--- a/flang/test/HLFIR/elemental-codegen.fir
+++ b/flang/test/HLFIR/elemental-codegen.fir
@@ -191,7 +191,7 @@ func.func @test_polymorphic(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.bindc_
// CHECK: %[[VAL_35:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_12:.*]] = arith.constant true
// CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_40:.*]] = arith.constant 1 : index
@@ -275,7 +275,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_36:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_13:.*]] = arith.constant true
// CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_41:.*]] = arith.constant 1 : index
@@ -328,7 +328,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_85:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_62:.*]] = arith.constant true
// CHECK: %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_90:.*]] = arith.constant 1 : index
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index a570f636b8db1..cec10dda839e9 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -90,7 +90,7 @@ end subroutine
subroutine sub4()
real, allocatable, device :: a(:)
- integer :: istream
+ integer(8) :: istream
allocate(a(10), stream=istream)
end subroutine
@@ -98,11 +98,10 @@ end subroutine
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
-! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
-! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[ISTREAM:.*]] = fir.alloca i64 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
+! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
-! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
-! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {data_attr = #cuf.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[ISTREAM_DECL]]#0 : !fir.ref<i64>) {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
index 5bb1ae3797346..6869af863644d 100644
--- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90
@@ -473,6 +473,6 @@ subroutine init()
end module
! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90
index 889cdef51f4ce..4d95ffa10edaf 100644
--- a/flang/test/Lower/OpenACC/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/acc-declare.f90
@@ -434,6 +434,6 @@ subroutine init()
end module
! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.if
-! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90
index dd8671daeaf8e..cbd7876203424 100644
--- a/flang/test/Lower/allocatable-polymorphic.f90
+++ b/flang/test/Lower/allocatable-polymorphic.f90
@@ -267,7 +267,7 @@ subroutine test_allocatable()
! CHECK: %[[C0:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> ()
! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}>
! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
@@ -276,7 +276,7 @@ subroutine test_allocatable()
! CHECK: %[[C0:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C1_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> ()
! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>
! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
@@ -285,7 +285,7 @@ subroutine test_allocatable()
! CHECK: %[[C0:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C2_CAST]], %[[TYPE_DESC_P2_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> ()
! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}>
! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
@@ -300,7 +300,7 @@ subroutine test_allocatable()
! CHECK: %[[C10_I64:.*]] = fir.convert %[[C10]] : (i32) -> i64
! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[C3_CAST]], %[[C0]], %[[C1_I64]], %[[C10_I64]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>
! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
@@ -316,7 +316,7 @@ subroutine test_allocatable()
! CHECK: %[[C20_I64:.*]] = fir.convert %[[C20]] : (i32) -> i64
! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[C4_CAST]], %[[C0]], %[[C1_I64]], %[[C20_I64]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[C1_LOAD1:.*]] = fir.load %[[C1_DECL]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>
! CHECK: fir.dispatch "proc1"(%[[C1_LOAD1]] : !fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>)
@@ -390,7 +390,7 @@ subroutine test_unlimited_polymorphic_with_intrinsic_type_spec()
! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitIntrinsicForAllocate(%[[BOX_NONE]], %[[CAT]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, i32, i32, i32, i32) -> ()
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[PTR_DECL]]#0 : (!fir.ref<!fir.class<!fir.ptr<none>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %[[CAT:.*]] = arith.constant 2 : i32
@@ -573,7 +573,7 @@ subroutine test_allocatable_up_character()
! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitCharacterForAllocate(%[[A_NONE]], %[[LEN]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i32, i32, i32) -> ()
! CHECK: %[[A_NONE:.*]] = fir.convert %[[A_DECL]]#0 : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
end module
@@ -592,17 +592,17 @@ program test_alloc
! LLVM-LABEL: define void @_QMpolyPtest_allocatable()
! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 1, i32 0)
! LLVM: call void @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 10)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0)
! LLVM: call void @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM-COUNT-2: call void %{{[0-9]*}}()
! LLVM: call void @llvm.memcpy.p0.p0.i32
@@ -683,5 +683,5 @@ program test_alloc
! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 8, i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]]
! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false)
! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0)
-! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
+! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
diff --git a/flang/test/Lower/allocatable-runtime.f90 b/flang/test/Lower/allocatable-runtime.f90
index 37272c90656cc..c63252c68974e 100644
--- a/flang/test/Lower/allocatable-runtime.f90
+++ b/flang/test/Lower/allocatable-runtime.f90
@@ -31,7 +31,7 @@ subroutine foo()
! CHECK: fir.call @{{.*}}AllocatableSetBounds(%[[xBoxCast2]], %c0{{.*}}, %[[xlbCast]], %[[xubCast]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
! CHECK-DAG: %[[xBoxCast3:.*]] = fir.convert %[[xBoxAddr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK-DAG: %[[sourceFile:.*]] = fir.convert %{{.*}} -> !fir.ref<i8>
- ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+ ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! Simply check that we are emitting the right numebr of set bound for y and z. Otherwise, this is just like x.
! CHECK: fir.convert %[[yBoxAddr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
@@ -180,4 +180,4 @@ subroutine mold_allocation()
! CHECK: %[[M_BOX_NONE:.*]] = fir.convert %[[EMBOX_M]] : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
! CHECK: fir.call @_FortranAAllocatableApplyMold(%[[A_BOX_NONE]], %[[M_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> ()
! CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/allocate-mold.f90 b/flang/test/Lower/allocate-mold.f90
index c7985b11397ce..9427c8b08786f 100644
--- a/flang/test/Lower/allocate-mold.f90
+++ b/flang/test/Lower/allocate-mold.f90
@@ -16,7 +16,7 @@ subroutine scalar_mold_allocation()
! CHECK: %[[A_REF_BOX_NONE1:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
! CHECK: fir.call @_FortranAAllocatableApplyMold(%[[A_REF_BOX_NONE1]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> ()
! CHECK: %[[A_REF_BOX_NONE2:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
subroutine array_scalar_mold_allocation()
real, allocatable :: a(:)
@@ -40,4 +40,4 @@ end subroutine array_scalar_mold_allocation
! CHECK: %[[REF_BOX_A1:.*]] = fir.convert %1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[REF_BOX_A1]], {{.*}},{{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
! CHECK: %[[REF_BOX_A2:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90
index 485861a838ff6..b7be5f685d9e3 100644
--- a/flang/test/Lower/polymorphic.f90
+++ b/flang/test/Lower/polymorphic.f90
@@ -1149,7 +1149,7 @@ program test
! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "test"} {
! CHECK: %[[ADDR_O:.*]] = fir.address_of(@_QFEo) : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>
! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], inner : (!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>) -> !fir.ref<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>
! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) {
diff --git a/flang/test/Transforms/lower-repack-arrays.fir b/flang/test/Transforms/lower-repack-arrays.fir
index bbae7ba5b0e0b..0b323b1bb0697 100644
--- a/flang/test/Transforms/lower-repack-arrays.fir
+++ b/flang/test/Transforms/lower-repack-arrays.fir
@@ -840,7 +840,7 @@ func.func @_QPtest6(%arg0: !fir.class<!fir.array<?x?x!fir.type<_QMmTt>>> {fir.bi
// CHECK: %[[VAL_34:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>
// CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>) -> !fir.box<none>
@@ -928,7 +928,7 @@ func.func @_QPtest6_stack(%arg0: !fir.class<!fir.array<?x?x!fir.type<_QMmTt>>> {
// CHECK: %[[VAL_34:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>>
// CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMmTt>>>>) -> !fir.box<none>
@@ -1015,7 +1015,7 @@ func.func @_QPtest7(%arg0: !fir.class<!fir.array<?x?xnone>> {fir.bindc_name = "x
// CHECK: %[[VAL_34:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
// CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?xnone>>>) -> !fir.box<none>
@@ -1103,7 +1103,7 @@ func.func @_QPtest7_stack(%arg0: !fir.class<!fir.array<?x?xnone>> {fir.bindc_nam
// CHECK: %[[VAL_34:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
-// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref<!fir.box<none>>, !fir.ref<i64>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?xnone>>>>
// CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}}
// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class<!fir.heap<!fir.array<?x?xnone>>>) -> !fir.box<none>
More information about the flang-commits
mailing list