[flang-commits] [flang] [flang][cuda] Handle simple device pointer allocation (PR #123996)

Wed Jan 22 11:59:17 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

Allocation of fortran pointer must use the flang pointer entry points and not the one for allocatable. The runtime makes checks that will fail. 

Source and double descriptors allocation will follow. 

---
Full diff: https://github.com/llvm/llvm-project/pull/123996.diff


5 Files Affected:

- (added) flang/include/flang/Runtime/CUDA/pointer.h (+27) 
- (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+23-4) 
- (modified) flang/runtime/CUDA/CMakeLists.txt (+1) 
- (added) flang/runtime/CUDA/pointer.cpp (+40) 
- (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+11) 


``````````diff

diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
new file mode 100644
index 00000000000000..db5242696303f5
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -0,0 +1,27 @@
+//===-- include/flang/Runtime/CUDA/pointer.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_POINTER_H_
+#define FORTRAN_RUNTIME_CUDA_POINTER_H_
+
+#include "flang/Runtime/descriptor-consts.h"
+#include "flang/Runtime/entry-names.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+
+/// Perform allocation of the descriptor.
+int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
+    bool hasStat = false, const Descriptor *errMsg = nullptr,
+    const char *sourceFile = nullptr, int sourceLine = 0);
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
+#endif // FORTRAN_RUNTIME_CUDA_POINTER_H_
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 8b8c00fa7ecfcb..23248f6d12622a 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -20,6 +20,7 @@
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
 #include "flang/Runtime/CUDA/memory.h"
+#include "flang/Runtime/CUDA/pointer.h"
 #include "flang/Runtime/allocatable.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -161,7 +162,18 @@ struct CUFAllocateOpConversion
     fir::FirOpBuilder builder(rewriter, mod);
     mlir::Location loc = op.getLoc();
 
+    bool isPointer = false;
+
+    if (auto declareOp =
+            mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
+      if (declareOp.getFortranAttrs() &&
+          bitEnumContainsAny(*declareOp.getFortranAttrs(),
+                             fir::FortranVariableFlagsEnum::pointer))
+        isPointer = true;
+
     if (hasDoubleDescriptors(op)) {
+      if (isPointer)
+        TODO(loc, "pointer allocation with double descriptors");
       // Allocation for module variable are done with custom runtime entry point
       // so the descriptors can be synchronized.
       mlir::func::FuncOp func;
@@ -176,13 +188,20 @@ struct CUFAllocateOpConversion
     }
 
     mlir::func::FuncOp func;
-    if (op.getSource())
+    if (op.getSource()) {
+      if (isPointer)
+        TODO(loc, "pointer allocation with source");
       func =
           fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocateSource)>(
               loc, builder);
-    else
-      func = fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
-          loc, builder);
+    } else {
+      func =
+          isPointer
+              ? fir::runtime::getRuntimeFunc<mkRTKey(CUFPointerAllocate)>(
+                    loc, builder)
+              : fir::runtime::getRuntimeFunc<mkRTKey(CUFAllocatableAllocate)>(
+                    loc, builder);
+    }
 
     return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
   }
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 3a88824826de31..23e01da72eded1 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -20,6 +20,7 @@ add_flang_library(${CUFRT_LIBNAME}
   kernel.cpp
   memmove-function.cpp
   memory.cpp
+  pointer.cpp
   registration.cpp
 )
 
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
new file mode 100644
index 00000000000000..0c5d3a5a6297d8
--- /dev/null
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -0,0 +1,40 @@
+//===-- runtime/CUDA/pointer.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/pointer.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "flang/Runtime/pointer.h"
+
+#include "cuda_runtime.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  if (desc.HasAddendum()) {
+    Terminator terminator{sourceFile, sourceLine};
+    // TODO: This require a bit more work to set the correct type descriptor
+    // address
+    terminator.Crash(
+        "not yet implemented: CUDA descriptor allocation with addendum");
+  }
+  // Perform the standard allocation.
+  int stat{
+      RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+  return stat;
+}
+
+RT_EXT_API_GROUP_END
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 35c6e2a77a697d..2ac9498d355414 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -181,4 +181,15 @@ func.func @_QQallocate_stream() {
 // CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
 // CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
 
+
+func.func @_QPp_alloc() {
+  %0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+  %4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
+  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
+  return
+}
+
+// CHECK-LABEL: func.func @_QPp_alloc()
+// CHECK: fir.call @_FortranACUFPointerAllocate
+
 } // end of module

``````````

</details>


https://github.com/llvm/llvm-project/pull/123996