[flang-commits] [flang] [flang][cuda] Add function to allocate and deallocate device module variable (PR #109213)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Wed Sep 18 17:44:27 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109213
>From 9fb92e2c8e36fbe5242b89a871637b11eba31ce8 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 17 Sep 2024 14:16:36 -0700
Subject: [PATCH 1/6] [flang][cuda] Add function to allocate and deallocate
device module variable
---
.../include/flang/Runtime/CUDA/allocatable.h | 34 +++++++++
flang/include/flang/Runtime/CUDA/allocator.h | 11 ---
flang/include/flang/Runtime/CUDA/common.h | 30 ++++++++
flang/include/flang/Runtime/CUDA/descriptor.h | 13 +++-
flang/include/flang/Runtime/CUDA/memory.h | 4 --
.../Optimizer/Transforms/CufOpConversion.cpp | 1 +
flang/runtime/CUDA/CMakeLists.txt | 4 ++
flang/runtime/CUDA/allocatable.cpp | 69 +++++++++++++++++++
flang/runtime/CUDA/allocator.cpp | 1 +
flang/runtime/CUDA/descriptor.cpp | 22 ++++++
flang/unittests/Runtime/CUDA/Allocatable.cpp | 60 ++++++++++++++++
flang/unittests/Runtime/CUDA/CMakeLists.txt | 8 +++
12 files changed, 240 insertions(+), 17 deletions(-)
create mode 100644 flang/include/flang/Runtime/CUDA/allocatable.h
create mode 100644 flang/include/flang/Runtime/CUDA/common.h
create mode 100644 flang/runtime/CUDA/allocatable.cpp
create mode 100644 flang/unittests/Runtime/CUDA/Allocatable.cpp
diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h
new file mode 100644
index 00000000000000..e986ad910a3f3a
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocatable.h
@@ -0,0 +1,34 @@
+//===-- include/flang/Runtime/CUDA/allocatable.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATABLE_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATABLE_H_
+
+#include "flang/Runtime/descriptor.h"
+#include "flang/Runtime/entry-names.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+
+/// Perform allocation of the descriptor with synchronization of it when
+/// necessary.
+int RTDECL(CUFAllocatableAllocate)(Descriptor &, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
+
+/// Perform deallocation of the descriptor with synchronization of it when
+/// necessary.
+int RTDECL(CUFAllocatableDeallocate)(Descriptor &, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATABLE_H_
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 4527c9f18fa054..06bda81c6f75ad 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -12,17 +12,6 @@
#include "flang/Runtime/descriptor.h"
#include "flang/Runtime/entry-names.h"
-#define CUDA_REPORT_IF_ERROR(expr) \
- [](cudaError_t err) { \
- if (err == cudaSuccess) \
- return; \
- const char *name = cudaGetErrorName(err); \
- if (!name) \
- name = "<unknown>"; \
- Terminator terminator{__FILE__, __LINE__}; \
- terminator.Crash("'%s' failed with '%s'", #expr, name); \
- }(expr)
-
namespace Fortran::runtime::cuda {
extern "C" {
diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h
new file mode 100644
index 00000000000000..cb8681da161f0d
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/common.h
@@ -0,0 +1,30 @@
+//===-- include/flang/Runtime/CUDA/common.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_COMMON_H_
+#define FORTRAN_RUNTIME_CUDA_COMMON_H_
+
+#include "flang/Runtime/descriptor.h"
+#include "flang/Runtime/entry-names.h"
+
+static constexpr unsigned kHostToDevice = 0;
+static constexpr unsigned kDeviceToHost = 1;
+static constexpr unsigned kDeviceToDevice = 2;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+ [](cudaError_t err) { \
+ if (err == cudaSuccess) \
+ return; \
+ const char *name = cudaGetErrorName(err); \
+ if (!name) \
+ name = "<unknown>"; \
+ Terminator terminator{__FILE__, __LINE__}; \
+ terminator.Crash("'%s' failed with '%s'", #expr, name); \
+ }(expr)
+
+#endif // FORTRAN_RUNTIME_CUDA_COMMON_H_
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index d593989420420f..7b870c74cd7adb 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -17,14 +17,23 @@ namespace Fortran::runtime::cuda {
extern "C" {
-// Allocate a descriptor in managed.
+/// Allocate a descriptor in managed.
Descriptor *RTDECL(CUFAllocDesciptor)(
std::size_t, const char *sourceFile = nullptr, int sourceLine = 0);
-// Deallocate a descriptor allocated in managed or unified memory.
+/// Deallocate a descriptor allocated in managed or unified memory.
void RTDECL(CUFFreeDesciptor)(
Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0);
+/// Retrieve the device descriptor's pointer from the host one.
+Descriptor *RTDECL(CUFGetDeviceDescAddress)(
+ Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
+
+/// Sync the \p src descriptor to the \p dst descriptor.
+void RTDECL(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
+ const char *sourceFile = nullptr, int sourceLine = 0);
+
} // extern "C"
+
} // namespace Fortran::runtime::cuda
#endif // FORTRAN_RUNTIME_CUDA_DESCRIPTOR_H_
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 8fd51129e81fe0..33947248dc4831 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -13,10 +13,6 @@
#include "flang/Runtime/entry-names.h"
#include <cstddef>
-static constexpr unsigned kHostToDevice = 0;
-static constexpr unsigned kDeviceToHost = 1;
-static constexpr unsigned kDeviceToDevice = 2;
-
namespace Fortran::runtime::cuda {
extern "C" {
diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
index 03a1eb74343b43..2dc37f4df3aeec 100644
--- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -14,6 +14,7 @@
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Support/DataLayout.h"
+#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
#include "flang/Runtime/CUDA/memory.h"
#include "flang/Runtime/allocatable.h"
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 490bb369b572f6..803ff01b945dc4 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -15,8 +15,12 @@ set(CUFRT_LIBNAME CufRuntime_cuda_${CUDAToolkit_VERSION_MAJOR})
add_flang_library(${CUFRT_LIBNAME}
allocator.cpp
+ allocatable.cpp
descriptor.cpp
memory.cpp
+
+ LINK_COMPONENTS
+ Support
)
if (BUILD_SHARED_LIBS)
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
new file mode 100644
index 00000000000000..3eafadb7842274
--- /dev/null
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -0,0 +1,69 @@
+//===-- runtime/CUDA/allocatable.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/allocatable.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "cuda_runtime.h"
+
+namespace Fortran::runtime::cuda {
+
+extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
+int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat,
+ const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+ if (desc.HasAddendum()) {
+ Terminator terminator{sourceFile, sourceLine};
+ // TODO: This require a bit more work to set the correct type descriptor
+ // address
+ terminator.Crash(
+ "not yet implemented: CUDA descriptor allocation with addendum");
+ }
+ // Perform the standard allocation.
+ int stat{RTNAME(AllocatableAllocate)(
+ desc, hasStat, errMsg, sourceFile, sourceLine)};
+#ifndef RT_DEVICE_COMPILATION
+ // Descriptor synchronization is only done when the allocation is done
+ // from the host.
+ if (stat == StatOk) {
+ Descriptor *deviceAddr{
+ RTNAME(CUFGetDeviceDescAddress)(desc, sourceFile, sourceLine)};
+ RTDECL(CUFDescriptorSync)(deviceAddr, &desc, sourceFile, sourceLine);
+ }
+#endif
+ return stat;
+}
+
+int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat,
+ const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+ // Perform the standard allocation.
+ int stat{RTNAME(AllocatableDeallocate)(
+ desc, hasStat, errMsg, sourceFile, sourceLine)};
+#ifndef RT_DEVICE_COMPILATION
+ // Descriptor synchronization is only done when the deallocation is done
+ // from the host.
+ if (stat == StatOk) {
+ Descriptor *deviceAddr{
+ RTNAME(CUFGetDeviceDescAddress)(desc, sourceFile, sourceLine)};
+ RTDECL(CUFDescriptorSync)(deviceAddr, &desc, sourceFile, sourceLine);
+ }
+#endif
+ return stat;
+}
+
+RT_EXT_API_GROUP_END
+
+} // extern "C"
+
+} // namespace Fortran::runtime::cuda
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index d4a473d58e86cd..85b3daf65a8ba4 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -13,6 +13,7 @@
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/allocator-registry.h"
#include "cuda_runtime.h"
diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp
index 1031b1e601b646..3eec0135b3d883 100644
--- a/flang/runtime/CUDA/descriptor.cpp
+++ b/flang/runtime/CUDA/descriptor.cpp
@@ -7,7 +7,11 @@
//===----------------------------------------------------------------------===//
#include "flang/Runtime/CUDA/descriptor.h"
+#include "../terminator.h"
#include "flang/Runtime/CUDA/allocator.h"
+#include "flang/Runtime/CUDA/common.h"
+
+#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
extern "C" {
@@ -23,6 +27,24 @@ void RTDEF(CUFFreeDesciptor)(
CUFFreeManaged(reinterpret_cast<void *>(desc));
}
+Descriptor *RTDEF(CUFGetDeviceDescAddress)(
+ Descriptor &desc, const char *sourceFile, int sourceLine) {
+ Terminator terminator{sourceFile, sourceLine};
+ void *p;
+ CUDA_REPORT_IF_ERROR(cudaGetSymbolAddress((void **)&p, &desc));
+ if (!p) {
+ terminator.Crash("Could not retrieve symbol's address");
+ }
+ return (Descriptor *)p;
+}
+
+void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
+ const char *sourceFile, int sourceLine) {
+ std::size_t count{src->SizeInBytes()};
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(
+ (void *)dst, (const void *)src, count, cudaMemcpyHostToDevice));
+}
+
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Allocatable.cpp b/flang/unittests/Runtime/CUDA/Allocatable.cpp
new file mode 100644
index 00000000000000..0f7eb27789316c
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/Allocatable.cpp
@@ -0,0 +1,60 @@
+//===-- flang/unittests/Runtime/Allocatable.cpp ------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/allocatable.h"
+#include "gtest/gtest.h"
+#include "../../../runtime/terminator.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
+#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocator-registry.h"
+
+#include "cuda_runtime.h"
+
+using namespace Fortran::runtime;
+using namespace Fortran::runtime::cuda;
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+TEST(AllocatableCUFTest, SimpleDeviceAllocatable) {
+ using Fortran::common::TypeCategory;
+ RTNAME(CUFRegisterAllocator)();
+ // REAL(4), DEVICE, ALLOCATABLE :: a(:)
+ auto a{createAllocatable(TypeCategory::Real, 4)};
+ a->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
+ EXPECT_FALSE(a->HasAddendum());
+ RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+
+ // Emulate a device descriptor for the purpose of unit testing part of the
+ // code.
+ Descriptor *device_desc;
+ CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes()));
+
+ RTNAME(AllocatableAllocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(a->IsAllocated());
+ RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
+ cudaDeviceSynchronize();
+
+ EXPECT_EQ(cudaSuccess, cudaGetLastError());
+
+ RTNAME(AllocatableDeallocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_FALSE(a->IsAllocated());
+
+ RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
+ cudaDeviceSynchronize();
+
+ EXPECT_EQ(cudaSuccess, cudaGetLastError());
+}
diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
index ed0caece3d15db..30fb8c220233c0 100644
--- a/flang/unittests/Runtime/CUDA/CMakeLists.txt
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -1,11 +1,19 @@
if (FLANG_CUF_RUNTIME)
add_flang_unittest(FlangCufRuntimeTests
+ Allocatable.cpp
AllocatorCUF.cpp
)
+if (BUILD_SHARED_LIBS)
+ set(CUDA_RT_TARGET CUDA::cudart)
+else()
+ set(CUDA_RT_TARGET CUDA::cudart_static)
+endif()
+
target_link_libraries(FlangCufRuntimeTests
PRIVATE
+ ${CUDA_RT_TARGET}
CufRuntime_cuda_${CUDAToolkit_VERSION_MAJOR}
FortranRuntime
)
>From 3547be80e2f66b7f8ff89ca99bab40873ff9f513 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 18 Sep 2024 17:21:16 -0700
Subject: [PATCH 2/6] Make GetDeviceAddress more generic
---
flang/include/flang/Runtime/CUDA/descriptor.h | 4 ++--
flang/runtime/CUDA/allocatable.cpp | 14 ++++++++------
flang/runtime/CUDA/descriptor.cpp | 8 ++++----
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index 7b870c74cd7adb..89e9dd2eeb5688 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -26,8 +26,8 @@ void RTDECL(CUFFreeDesciptor)(
Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0);
/// Retrieve the device descriptor's pointer from the host one.
-Descriptor *RTDECL(CUFGetDeviceDescAddress)(
- Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
+void *RTDECL(CUFGetDeviceAddress)(
+ void *hostPtr, const char *sourceFile = nullptr, int sourceLine = 0);
/// Sync the \p src descriptor to the \p dst descriptor.
void RTDECL(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index 3eafadb7842274..f464d1b561d4a6 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -37,9 +37,10 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat,
// Descriptor synchronization is only done when the allocation is done
// from the host.
if (stat == StatOk) {
- Descriptor *deviceAddr{
- RTNAME(CUFGetDeviceDescAddress)(desc, sourceFile, sourceLine)};
- RTDECL(CUFDescriptorSync)(deviceAddr, &desc, sourceFile, sourceLine);
+ void *deviceAddr{
+ RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
+ RTDECL(CUFDescriptorSync)(
+ (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
return stat;
@@ -54,9 +55,10 @@ int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat,
// Descriptor synchronization is only done when the deallocation is done
// from the host.
if (stat == StatOk) {
- Descriptor *deviceAddr{
- RTNAME(CUFGetDeviceDescAddress)(desc, sourceFile, sourceLine)};
- RTDECL(CUFDescriptorSync)(deviceAddr, &desc, sourceFile, sourceLine);
+ void *deviceAddr{
+ RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
+ RTDECL(CUFDescriptorSync)(
+ (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
return stat;
diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp
index 3eec0135b3d883..7ce1429cd94d4a 100644
--- a/flang/runtime/CUDA/descriptor.cpp
+++ b/flang/runtime/CUDA/descriptor.cpp
@@ -27,15 +27,15 @@ void RTDEF(CUFFreeDesciptor)(
CUFFreeManaged(reinterpret_cast<void *>(desc));
}
-Descriptor *RTDEF(CUFGetDeviceDescAddress)(
- Descriptor &desc, const char *sourceFile, int sourceLine) {
+void *RTDEF(CUFGetDeviceAddress)(
+ void *hostPtr, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
void *p;
- CUDA_REPORT_IF_ERROR(cudaGetSymbolAddress((void **)&p, &desc));
+ CUDA_REPORT_IF_ERROR(cudaGetSymbolAddress((void **)&p, hostPtr));
if (!p) {
terminator.Crash("Could not retrieve symbol's address");
}
- return (Descriptor *)p;
+ return p;
}
void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
>From a56f0e91c15dea4bfc37799ee68b0a975ef1250e Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 18 Sep 2024 17:31:38 -0700
Subject: [PATCH 3/6] Fix call with RTDECL -> RTNAME
---
flang/runtime/CUDA/allocatable.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index f464d1b561d4a6..3236a99bea3343 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -39,7 +39,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat,
if (stat == StatOk) {
void *deviceAddr{
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
- RTDECL(CUFDescriptorSync)(
+ RTNAME(CUFDescriptorSync)(
(Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
@@ -57,7 +57,7 @@ int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat,
if (stat == StatOk) {
void *deviceAddr{
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
- RTDECL(CUFDescriptorSync)(
+ RTNAME(CUFDescriptorSync)(
(Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
>From 852ee9091fd0b10d7c7237aed024d587bbc2ba8a Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 18 Sep 2024 17:36:17 -0700
Subject: [PATCH 4/6] clang-format
---
flang/runtime/CUDA/allocatable.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index 3236a99bea3343..fde3454dc3c890 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -40,7 +40,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat,
void *deviceAddr{
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
RTNAME(CUFDescriptorSync)(
- (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
+ (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
return stat;
@@ -58,7 +58,7 @@ int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat,
void *deviceAddr{
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
RTNAME(CUFDescriptorSync)(
- (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
+ (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
return stat;
>From b390c4e1ce093676ab14edeee8f28b2c556167ef Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 18 Sep 2024 17:40:37 -0700
Subject: [PATCH 5/6] More clang-format
---
flang/runtime/CUDA/allocatable.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index fde3454dc3c890..cc0c647c6c9529 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -39,8 +39,8 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat,
if (stat == StatOk) {
void *deviceAddr{
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
- RTNAME(CUFDescriptorSync)(
- (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
+ RTNAME(CUFDescriptorSync)
+ ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
return stat;
@@ -57,8 +57,8 @@ int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat,
if (stat == StatOk) {
void *deviceAddr{
RTNAME(CUFGetDeviceAddress)((void *)&desc, sourceFile, sourceLine)};
- RTNAME(CUFDescriptorSync)(
- (Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
+ RTNAME(CUFDescriptorSync)
+ ((Descriptor *)deviceAddr, &desc, sourceFile, sourceLine);
}
#endif
return stat;
>From 5d913398ead48e3b7eb89ae39646f99ba209a672 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 18 Sep 2024 17:44:13 -0700
Subject: [PATCH 6/6] Fix comment
---
flang/include/flang/Runtime/CUDA/descriptor.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index 89e9dd2eeb5688..93791012fdcc73 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -25,7 +25,7 @@ Descriptor *RTDECL(CUFAllocDesciptor)(
void RTDECL(CUFFreeDesciptor)(
Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0);
-/// Retrieve the device descriptor's pointer from the host one.
+/// Retrieve the device pointer from the host one.
void *RTDECL(CUFGetDeviceAddress)(
void *hostPtr, const char *sourceFile = nullptr, int sourceLine = 0);
More information about the flang-commits
mailing list