[flang-commits] [flang] 6e498bc - [flang][cuda] Handle simple device pointer allocation (#123996)
via flang-commits
flang-commits at lists.llvm.org
Wed Jan 22 15:59:36 PST 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-01-22T15:59:32-08:00
New Revision: 6e498bc2cd765f4c421d32d610bdc0effec62b42
URL: https://github.com/llvm/llvm-project/commit/6e498bc2cd765f4c421d32d610bdc0effec62b42
DIFF: https://github.com/llvm/llvm-project/commit/6e498bc2cd765f4c421d32d610bdc0effec62b42.diff
LOG: [flang][cuda] Handle simple device pointer allocation (#123996)
Added:
flang/include/flang/Runtime/CUDA/pointer.h
flang/runtime/CUDA/pointer.cpp
Modified:
flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
flang/runtime/CUDA/CMakeLists.txt
flang/test/Fir/CUDA/cuda-allocate.fir
Removed:
################################################################################
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
new file mode 100644
index 00000000000000..db5242696303f5
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -0,0 +1,27 @@
+//===-- include/flang/Runtime/CUDA/pointer.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_POINTER_H_
+#define FORTRAN_RUNTIME_CUDA_POINTER_H_
+
+#include "flang/Runtime/descriptor-consts.h"
+#include "flang/Runtime/entry-names.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+
+/// Perform allocation of the descriptor.
+int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
+ bool hasStat = false, const Descriptor *errMsg = nullptr,
+ const char *sourceFile = nullptr, int sourceLine = 0);
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
+#endif // FORTRAN_RUNTIME_CUDA_POINTER_H_
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 8b8c00fa7ecfcb..23248f6d12622a 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -20,6 +20,7 @@
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
#include "flang/Runtime/CUDA/memory.h"
+#include "flang/Runtime/CUDA/pointer.h"
#include "flang/Runtime/allocatable.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -161,7 +162,18 @@ struct CUFAllocateOpConversion
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
+ bool isPointer = false;
+
+ if (auto declareOp =
+ mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
+ if (declareOp.getFortranAttrs() &&
+ bitEnumContainsAny(*declareOp.getFortranAttrs(),
+ fir::FortranVariableFlagsEnum::pointer))
+ isPointer = true;
+
if (hasDoubleDescriptors(op)) {
+ if (isPointer)
+ TODO(loc, "pointer allocation with double descriptors");
// Allocation for module variable are done with custom runtime entry point
// so the descriptors can be synchronized.
mlir::func::FuncOp func;
@@ -176,13 +188,20 @@ struct CUFAllocateOpConversion
}
mlir::func::FuncOp func;
- if (op.getSource())
+ if (op.getSource()) {
+ if (isPointer)
+ TODO(loc, "pointer allocation with source");
func =
fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocateSource)>(
loc, builder);
- else
- func = fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
- loc, builder);
+ } else {
+ func =
+ isPointer
+ ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>(
+ loc, builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
+ loc, builder);
+ }
return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
}
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 3a88824826de31..23e01da72eded1 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -20,6 +20,7 @@ add_flang_library(${CUFRT_LIBNAME}
kernel.cpp
memmove-function.cpp
memory.cpp
+ pointer.cpp
registration.cpp
)
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
new file mode 100644
index 00000000000000..0c5d3a5a6297d8
--- /dev/null
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -0,0 +1,40 @@
+//===-- runtime/CUDA/pointer.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/pointer.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "flang/Runtime/pointer.h"
+
+#include "cuda_runtime.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
+ const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+ if (desc.HasAddendum()) {
+ Terminator terminator{sourceFile, sourceLine};
+ // TODO: This require a bit more work to set the correct type descriptor
+ // address
+ terminator.Crash(
+ "not yet implemented: CUDA descriptor allocation with addendum");
+ }
+ // Perform the standard allocation.
+ int stat{
+ RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+ return stat;
+}
+
+RT_EXT_API_GROUP_END
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 35c6e2a77a697d..2ac9498d355414 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -181,4 +181,15 @@ func.func @_QQallocate_stream() {
// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+
+func.func @_QPp_alloc() {
+ %0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+ %4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+ %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
+ return
+}
+
+// CHECK-LABEL: func.func @_QPp_alloc()
+// CHECK: fir.call @_FortranACUFPointerAllocate
+
} // end of module
More information about the flang-commits
mailing list