[libc-commits] [libc] [libc] Implement 'atexit' on the GPU correctly (PR #83037)

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Mon Feb 26 16:34:52 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/83037

>From 8cc743bbcedc40e20207e3c164096bbc893b57d4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 26 Feb 2024 11:15:48 -0600
Subject: [PATCH] [libc] Implement 'atexit' on the GPU correctly

Summary:
This function was never marked at supported because it was fundamentally
broken when called with multiple threads. The patch in
https://github.com/llvm/llvm-project/pull/83026 introduces a lock-free
stack that can be used to correctly handle enqueuing callbacks from
multiple threads. Although the previous interface tried to provide a
consistent API, this was not feasible with the needs for a lock-free
stack so I have elected to just use ifdefs. The size is fixed to
whatever we use for testing, which currently amounts to about 8KiB
dedicated for this thing, which isn't enough to be concenred about.

Depends on https://github.com/llvm/llvm-project/pull/83026
---
 libc/docs/gpu/support.rst              |  1 +
 libc/src/stdlib/CMakeLists.txt         | 37 +++++++++------
 libc/src/stdlib/atexit.cpp             | 10 +---
 libc/src/stdlib/gpu/CMakeLists.txt     | 13 ++++++
 libc/src/stdlib/gpu/atexit.cpp         | 63 ++++++++++++++++++++++++++
 libc/startup/gpu/amdgpu/CMakeLists.txt |  1 +
 libc/startup/gpu/nvptx/CMakeLists.txt  |  1 +
 7 files changed, 104 insertions(+), 22 deletions(-)
 create mode 100644 libc/src/stdlib/gpu/atexit.cpp

diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst
index 250f0a7794de3c..b2231eb69a590a 100644
--- a/libc/docs/gpu/support.rst
+++ b/libc/docs/gpu/support.rst
@@ -102,6 +102,7 @@ atol           |check|
 atoll          |check|
 exit           |check|    |check|
 abort          |check|    |check|
+atexit         |check|
 labs           |check|
 llabs          |check|
 div            |check|
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index bd0bcffe0045d1..4fc979a5927ad4 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -363,20 +363,22 @@ add_entrypoint_object(
     libc.src.__support.OSUtil.osutil
 )
 
-add_entrypoint_object(
-  atexit
-  SRCS
-    atexit.cpp
-  HDRS
-    atexit.h
-  CXX_STANDARD
-    20 # For constinit of the atexit callback list.
-  DEPENDS
-    libc.src.__support.fixedvector
-    libc.src.__support.blockstore
-    libc.src.__support.threads.mutex
-    libc.src.__support.CPP.new
-)
+if(NOT LIBC_TARGET_OS_IS_GPU)
+  add_entrypoint_object(
+    atexit
+    SRCS
+      atexit.cpp
+    HDRS
+      atexit.h
+    CXX_STANDARD
+      20 # For constinit of the atexit callback list.
+    DEPENDS
+      libc.src.__support.fixedvector
+      libc.src.__support.blockstore
+      libc.src.__support.threads.mutex
+      libc.src.__support.CPP.new
+  )
+endif()
 
 add_entrypoint_object(
   exit
@@ -398,6 +400,13 @@ add_entrypoint_object(
 )
 
 if(LIBC_TARGET_OS_IS_GPU)
+  add_entrypoint_object(
+    atexit
+    ALIAS
+    DEPENDS
+      .${LIBC_TARGET_OS}.atexit
+  )
+
   add_entrypoint_object(
     malloc
     ALIAS
diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp
index 1513b7969f0dbc..a502c88335b5df 100644
--- a/libc/src/stdlib/atexit.cpp
+++ b/libc/src/stdlib/atexit.cpp
@@ -9,6 +9,7 @@
 #include "src/stdlib/atexit.h"
 #include "src/__support/blockstore.h"
 #include "src/__support/common.h"
+#include "src/__support/fixedstack.h"
 #include "src/__support/fixedvector.h"
 #include "src/__support/threads/mutex.h"
 
@@ -28,14 +29,7 @@ struct AtExitUnit {
   constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {}
 };
 
-#if defined(LIBC_TARGET_ARCH_IS_GPU)
-// The GPU build cannot handle the potentially recursive definitions required by
-// the BlockStore class. Additionally, the liklihood that someone exceeds this
-// while executing on the GPU is extremely small.
-// FIXME: It is not generally safe to use 'atexit' on the GPU because the
-//        mutexes simply passthrough. We will need a lock free stack.
-using ExitCallbackList = FixedVector<AtExitUnit, 64>;
-#elif defined(LIBC_COPT_PUBLIC_PACKAGING)
+#if defined(LIBC_COPT_PUBLIC_PACKAGING)
 using ExitCallbackList = cpp::ReverseOrderBlockStore<AtExitUnit, 32>;
 #else
 // BlockStore uses dynamic memory allocation. To avoid dynamic memory
diff --git a/libc/src/stdlib/gpu/CMakeLists.txt b/libc/src/stdlib/gpu/CMakeLists.txt
index 71ae10648d004e..f461e83fb4748b 100644
--- a/libc/src/stdlib/gpu/CMakeLists.txt
+++ b/libc/src/stdlib/gpu/CMakeLists.txt
@@ -30,3 +30,16 @@ add_entrypoint_object(
     libc.include.stdlib
     libc.src.__support.RPC.rpc_client
 )
+
+add_entrypoint_object(
+  atexit
+  SRCS
+    atexit.cpp
+  HDRS
+    ../atexit.h
+  CXX_STANDARD
+    20 # For constinit of the atexit callback list.
+  DEPENDS
+    libc.include.stdlib
+    libc.src.__support.fixedstack
+)
diff --git a/libc/src/stdlib/gpu/atexit.cpp b/libc/src/stdlib/gpu/atexit.cpp
new file mode 100644
index 00000000000000..aa87678adf8371
--- /dev/null
+++ b/libc/src/stdlib/gpu/atexit.cpp
@@ -0,0 +1,63 @@
+//===-- GPU implementation of atexit --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/atexit.h"
+#include "src/__support/common.h"
+#include "src/__support/fixedstack.h"
+
+namespace LIBC_NAMESPACE {
+
+namespace {
+
+using AtExitCallback = void(void *);
+using StdCAtExitCallback = void(void);
+
+struct AtExitUnit {
+  AtExitCallback *callback = nullptr;
+  void *payload = nullptr;
+  constexpr AtExitUnit() = default;
+  constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {}
+};
+
+// The GPU interface cannot use the standard implementation because it does not
+// support the Mutex type. Instead we use a lock free stack with a sufficiently
+// large size.
+constinit FixedStack<AtExitUnit, CALLBACK_LIST_SIZE_FOR_TESTS> exit_callbacks;
+
+void stdc_at_exit_func(void *payload) {
+  reinterpret_cast<StdCAtExitCallback *>(payload)();
+}
+
+} // namespace
+
+namespace internal {
+
+void call_exit_callbacks() {
+  AtExitUnit unit;
+  while (exit_callbacks.pop(unit))
+    unit.callback(unit.payload);
+}
+
+} // namespace internal
+
+static int add_atexit_unit(const AtExitUnit &unit) {
+  if (!exit_callbacks.push(unit))
+    return -1;
+  return 0;
+}
+
+extern "C" int __cxa_atexit(AtExitCallback *callback, void *payload, void *) {
+  return add_atexit_unit({callback, payload});
+}
+
+LLVM_LIBC_FUNCTION(int, atexit, (StdCAtExitCallback * callback)) {
+  return add_atexit_unit(
+      {&stdc_at_exit_func, reinterpret_cast<void *>(callback)});
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt
index 3ac104ee8ba94a..d453d9aa7a0b94 100644
--- a/libc/startup/gpu/amdgpu/CMakeLists.txt
+++ b/libc/startup/gpu/amdgpu/CMakeLists.txt
@@ -5,6 +5,7 @@ add_startup_object(
   DEPENDS
     libc.src.__support.RPC.rpc_client
     libc.src.__support.GPU.utils
+    libc.src.__support.CPP.new
     libc.src.stdlib.exit
     libc.src.stdlib.atexit
   COMPILE_OPTIONS
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index 3ac104ee8ba94a..d453d9aa7a0b94 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -5,6 +5,7 @@ add_startup_object(
   DEPENDS
     libc.src.__support.RPC.rpc_client
     libc.src.__support.GPU.utils
+    libc.src.__support.CPP.new
     libc.src.stdlib.exit
     libc.src.stdlib.atexit
   COMPILE_OPTIONS



More information about the libc-commits mailing list