[flang] [llvm] [flang][cuda] Pass allocator index to allocate functions (PR #157189)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 5 15:12:20 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
Pass the allocator index as part of the allocate function. The information is part of cuf.allocate and it is useful for device resident components.
---
Patch is 26.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157189.diff
12 Files Affected:
- (modified) flang-rt/lib/cuda/allocatable.cpp (+14-11)
- (modified) flang-rt/lib/cuda/pointer.cpp (+19-14)
- (modified) flang-rt/lib/runtime/CMakeLists.txt (+1-1)
- (modified) flang/include/flang/Lower/CUDA.h (+1-15)
- (modified) flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h (+2)
- (modified) flang/include/flang/Runtime/CUDA/allocatable.h (+12-10)
- (modified) flang/include/flang/Runtime/CUDA/pointer.h (+12-10)
- (modified) flang/lib/Lower/CUDA.cpp (+17)
- (modified) flang/lib/Lower/ConvertVariable.cpp (+3-15)
- (modified) flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp (+13)
- (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+7-5)
- (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+7-7)
``````````diff
diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp
index ff1a225d66ce9..483b54061036d 100644
--- a/flang-rt/lib/cuda/allocatable.cpp
+++ b/flang-rt/lib/cuda/allocatable.cpp
@@ -23,11 +23,11 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
-int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
- bool *pinned, bool hasStat, const Descriptor *errMsg,
+int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int32_t allocIdx,
+ int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
- desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
+ desc, allocIdx, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
#ifndef RT_DEVICE_COMPILATION
// Descriptor synchronization is only done when the allocation is done
// from the host.
@@ -41,9 +41,12 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
return stat;
}
-int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
- bool *pinned, bool hasStat, const Descriptor *errMsg,
+int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int32_t allocIdx,
+ int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
+#if !defined(RT_DEVICE_COMPILATION)
+ desc.SetAllocIdx(allocIdx);
+#endif
// Perform the standard allocation.
int stat{RTNAME(AllocatableAllocate)(
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
@@ -56,10 +59,10 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
}
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
- int stat{RTNAME(CUFAllocatableAllocate)(
- alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
+ int stat{RTNAME(CUFAllocatableAllocate)(alloc, allocIdx, stream, pinned,
+ hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
@@ -69,10 +72,10 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
+ const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
- int stat{RTNAME(CUFAllocatableAllocateSync)(
- alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
+ int stat{RTNAME(CUFAllocatableAllocateSync)(alloc, allocIdx, stream, pinned,
+ hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
diff --git a/flang-rt/lib/cuda/pointer.cpp b/flang-rt/lib/cuda/pointer.cpp
index d3f5cfe8e96a1..3e450596e0f12 100644
--- a/flang-rt/lib/cuda/pointer.cpp
+++ b/flang-rt/lib/cuda/pointer.cpp
@@ -22,9 +22,12 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
-int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int32_t allocIdx,
+ int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
+#if !defined(RT_DEVICE_COMPILATION)
+ desc.SetAllocIdx(allocIdx);
+#endif
// Perform the standard allocation.
int stat{
RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
@@ -36,11 +39,11 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned,
return stat;
}
-int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream,
- bool *pinned, bool hasStat, const Descriptor *errMsg,
+int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int32_t allocIdx,
+ int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
- desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
+ desc, allocIdx, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
#ifndef RT_DEVICE_COMPILATION
// Descriptor synchronization is only done when the allocation is done
// from the host.
@@ -55,10 +58,11 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream,
}
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
- const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
- const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
- int stat{RTNAME(CUFPointerAllocate)(
- pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
+ const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned,
+ bool hasStat, const Descriptor *errMsg, const char *sourceFile,
+ int sourceLine) {
+ int stat{RTNAME(CUFPointerAllocate)(pointer, allocIdx, stream, pinned,
+ hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
@@ -68,10 +72,11 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
}
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
- const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
- const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
- int stat{RTNAME(CUFPointerAllocateSync)(
- pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
+ const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned,
+ bool hasStat, const Descriptor *errMsg, const char *sourceFile,
+ int sourceLine) {
+ int stat{RTNAME(CUFPointerAllocateSync)(pointer, allocIdx, stream, pinned,
+ hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index 6548ec955b2b8..bd4eca52d6e29 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -180,7 +180,7 @@ if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
# findloc.cpp has some issues with higher compute capability. Remove it
# from CUDA build until we can lower its memory footprint.
- list(REMOVE_ITEM supported_sources findloc.cpp)
+
set(sources ${supported_sources})
else ()
set(sources ${supported_sources} ${host_sources} ${f128_sources})
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index 4a831fd502af4..0a085f47327f2 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -31,21 +31,7 @@ namespace Fortran::lower {
class AbstractConverter;
-static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
- std::optional<Fortran::common::CUDADataAttr> cudaAttr =
- Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
- if (cudaAttr) {
- if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
- return kPinnedAllocatorPos;
- if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
- return kDeviceAllocatorPos;
- if (*cudaAttr == Fortran::common::CUDADataAttr::Managed)
- return kManagedAllocatorPos;
- if (*cudaAttr == Fortran::common::CUDADataAttr::Unified)
- return kUnifiedAllocatorPos;
- }
- return kDefaultAllocator;
-}
+unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym);
void initializeDeviceComponentAllocator(
Fortran::lower::AbstractConverter &converter,
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
index 4a250d1cc6c54..c00f9e718ad18 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
@@ -112,6 +112,8 @@ cuf::DataAttributeAttr getDataAttr(mlir::Operation *op);
/// Returns true if the operation has a data attribute with the given value.
bool hasDataAttr(mlir::Operation *op, cuf::DataAttribute value);
+unsigned getAllocatorIdx(cuf::DataAttribute dataAttr);
+
} // namespace cuf
#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h
index 6c97afa9e10e8..43b45cff9a1f5 100644
--- a/flang/include/flang/Runtime/CUDA/allocatable.h
+++ b/flang/include/flang/Runtime/CUDA/allocatable.h
@@ -17,31 +17,33 @@ namespace Fortran::runtime::cuda {
extern "C" {
/// Perform allocation of the descriptor.
-int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t *stream = nullptr,
- bool *pinned = nullptr, bool hasStat = false,
+int RTDECL(CUFAllocatableAllocate)(Descriptor &, int32_t allocIdx,
+ int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
-int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t *stream = nullptr,
- bool *pinned = nullptr, bool hasStat = false,
+int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int32_t allocIdx,
+ int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr,
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr,
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform deallocation of the descriptor with synchronization of it when
/// necessary.
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index bdfc3268e0814..64698370534ce 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -17,31 +17,33 @@ namespace Fortran::runtime::cuda {
extern "C" {
/// Perform allocation of the descriptor.
-int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t *stream = nullptr,
- bool *pinned = nullptr, bool hasStat = false,
+int RTDECL(CUFPointerAllocate)(Descriptor &, int32_t allocIdx,
+ int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
-int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t *stream = nullptr,
- bool *pinned = nullptr, bool hasStat = false,
+int RTDECL(CUFPointerAllocateSync)(Descriptor &, int32_t allocIdx,
+ int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
- const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr,
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
- const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr,
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
} // extern "C"
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index 1293d2c5bd3ae..5bb0a11e4fa56 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -165,3 +165,20 @@ bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
return true;
return false;
}
+
+unsigned
+Fortran::lower::getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
+ std::optional<Fortran::common::CUDADataAttr> cudaAttr =
+ Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
+ if (cudaAttr) {
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
+ return kPinnedAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
+ return kDeviceAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Managed)
+ return kManagedAllocatorPos;
+ if (*cudaAttr == Fortran::common::CUDADataAttr::Unified)
+ return kUnifiedAllocatorPos;
+ }
+ return kDefaultAllocator;
+}
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 80af7f4c1aaad..6e9518a0f3349 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -478,20 +478,6 @@ createGlobalInitialization(fir::FirOpBuilder &builder, fir::GlobalOp global,
builder.restoreInsertionPoint(insertPt);
}
-static unsigned getAllocatorIdxFromDataAttr(cuf::DataAttributeAttr dataAttr) {
- if (dataAttr) {
- if (dataAttr.getValue() == cuf::DataAttribute::Pinned)
- return kPinnedAllocatorPos;
- if (dataAttr.getValue() == cuf::DataAttribute::Device)
- return kDeviceAllocatorPos;
- if (dataAttr.getValue() == cuf::DataAttribute::Managed)
- return kManagedAllocatorPos;
- if (dataAttr.getValue() == cuf::DataAttribute::Unified)
- return kUnifiedAllocatorPos;
- }
- return kDefaultAllocator;
-}
-
/// Create the global op and its init if it has one
fir::GlobalOp Fortran::lower::defineGlobal(
Fortran::lower::AbstractConverter &converter,
@@ -554,7 +540,9 @@ fir::GlobalOp Fortran::lower::defineGlobal(
mlir::Value box = fir::factory::createUnallocatedBox(
b, loc, symTy,
/*nonDeferredParams=*/{},
- /*typeSourceBox=*/{}, getAllocatorIdxFromDataAttr(dataAttr));
+ /*typeSourceBox=*/{},
+ dataAttr ? cuf::getAllocatorIdx(dataAttr.getValue())
+ : kDefaultAllocator);
fir::HasValueOp::create(b, loc, box);
});
}
diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
index bd0499f406c18..fd5dd555c04cd 100644
--- a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
@@ -12,6 +12,7 @@
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Runtime/allocator-registry-consts.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/DialectImplementation.h"
@@ -52,4 +53,16 @@ bool hasDataAttr(mlir::Operation *op, cuf::DataAttribute value) {
return false;
}
+unsigned getAllocatorIdx(cuf::DataAttribute dataAttr) {
+ if (dataAttr == cuf::DataAttribute::Pinned)
+ return kPinnedAllocatorPos;
+ if (dataAttr == cuf::DataAttribute::Device)
+ return kDeviceAllocatorPos;
+ if (dataAttr == cuf::DataAttribute::Managed)
+ return kManagedAllocatorPos;
+ if (dataAttr == cuf::DataAttribute::Unified)
+ return kUnifiedAllocatorPos;
+ return kDefaultAllocator;
+}
+
} // namespace cuf
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 9834b0499b930..9021c5d982321 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -106,7 +106,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
mlir::Value sourceLine;
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>)
sourceLine = fir::factory::locationToLineNo(
- builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6));
+ builder, loc, op.getSource() ? fTy.getInput(8) : fTy.getInput(7));
else
sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
@@ -122,6 +122,8 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
}
llvm::SmallVector<mlir::Value> args;
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) {
+ mlir::Value allocIdx = builder.createIntegerConstant(
+ loc, builder.getI32Type(), cuf::getAllocatorIdx(op.getDataAttr()));
mlir::Value pinned =
op.getPinned()
? op.getPinned()
@@ -133,15 +135,15 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
op.getStream() ? op.getStream()
: builder.createNullConstant(loc, fTy.getInput(2));
args = fir::runtime::createArguments(
- builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
- hasStat, errmsg, sourceFile, sourceLine);
+ builder, loc, fTy, op.getBox(), op.getSource(), allocIdx, stream,
+ pinned, hasStat, errmsg, sourceFile, sourceLine);
} else {
mlir::Value stream =
op.getStream() ? op.getStream()
: builder.createNullConstant(loc, fTy.getInput(1));
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
- stream, pinned, hasStat, errmsg,
- sourceFile, sourceLine);
+ allocIdx, stream, pinned, hasStat,
+ errmsg, sourceFile, sourceLine);
}
} else {
args =
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index ea7890c9aac52..799d9991dfa83 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -19,7 +19,7 @@ func.func @_QPsub1() {
// CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
// CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/157189
More information about the llvm-commits
mailing list