[libc-commits] [libc] 533145c - [libc] Support 'assert.h' on the GPU

Thu Aug 31 13:04:54 PDT 2023

Author: Joseph Huber
Date: 2023-08-31T15:04:43-05:00
New Revision: 533145c458fa022ff5b24dfa33e71539747f5c2c

URL: https://github.com/llvm/llvm-project/commit/533145c458fa022ff5b24dfa33e71539747f5c2c
DIFF: https://github.com/llvm/llvm-project/commit/533145c458fa022ff5b24dfa33e71539747f5c2c.diff

LOG: [libc] Support  'assert.h' on the GPU

This patch adds the necessary support to provide `assert` functionality
through the GPU `libc` implementation. This implementation creates a
special-case GPU implementation rather than relying on the common
version. This is because the GPU has special considerings for printing.
The assertion is printed out in chunks with `write_to_stderr`, however
when combined with the GPU execution model this causes 32+ threads to
all execute in-lock step. Meaning that we'll get a horribly fragmented
message. Furthermore, potentially thousands of threads could hit the
assertion at once and try to print even if we had it all in one
`printf`.

This is solved by having a one-time lock that each thread group / wave /
warp will attempt to claim. We only let one thread group pass through
while the others simply stop executing. Finally only the first thread in
that group will do the printing until we finally abort execution.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D159296

Added: 
    libc/src/assert/generic/CMakeLists.txt
    libc/src/assert/generic/__assert_fail.cpp
    libc/src/assert/gpu/CMakeLists.txt
    libc/src/assert/gpu/__assert_fail.cpp

Modified: 
    libc/config/gpu/api.td
    libc/config/gpu/entrypoints.txt
    libc/config/gpu/headers.txt
    libc/docs/gpu/support.rst
    libc/src/__support/GPU/utils.h
    libc/src/__support/RPC/rpc.h
    libc/src/__support/RPC/rpc_util.h
    libc/src/assert/CMakeLists.txt

Removed: 
    libc/src/assert/__assert_fail.cpp


################################################################################
diff  --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td
index 4435ded2aa45e4..f8db0742ac8292 100644

--- a/libc/config/gpu/api.td
+++ b/libc/config/gpu/api.td
@@ -3,6 +3,38 @@ include "config/public_api.td"
 include "spec/stdc.td"
 include "spec/posix.td"
 include "spec/gpu_ext.td"
+include "spec/gnu_ext.td"
+include "spec/llvm_libc_ext.td"
+
+def AssertMacro : MacroDef<"assert"> {
+  let Defn = [{
+    #undef assert
+
+    #ifdef NDEBUG
+    #define assert(e) (void)0
+    #else
+
+    #define assert(e)  \
+      ((e) ? (void)0 : __assert_fail(#e, __FILE__, __LINE__, __PRETTY_FUNCTION__))
+    #endif
+  }];
+}
+
+def StaticAssertMacro : MacroDef<"static_assert"> {
+  let Defn = [{
+    #ifndef __cplusplus
+    #undef static_assert
+    #define static_assert _Static_assert
+    #endif
+  }];
+}
+
+def AssertAPI : PublicAPI<"assert.h"> {
+  let Macros = [
+    AssertMacro,
+    StaticAssertMacro,
+  ];
+}
 
 def StringAPI : PublicAPI<"string.h"> {
   let Types = ["size_t"];

diff  --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 1d3047ab3c146b..bf59ba6b0a3eaa 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -1,4 +1,7 @@
 set(TARGET_LIBC_ENTRYPOINTS
+    # assert.h entrypoints
+    libc.src.assert.__assert_fail
+
     # ctype.h entrypoints
     libc.src.ctype.isalnum
     libc.src.ctype.isalpha

diff  --git a/libc/config/gpu/headers.txt b/libc/config/gpu/headers.txt
index 3a8f091cda413a..dae01310fe9c31 100644
--- a/libc/config/gpu/headers.txt
+++ b/libc/config/gpu/headers.txt
@@ -1,4 +1,5 @@
 set(TARGET_PUBLIC_HEADERS
+    libc.include.assert
     libc.include.ctype
     libc.include.string
     libc.include.inttypes

diff  --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst
index d1c27c7e8032d7..fbe69c66ca53b4 100644
--- a/libc/docs/gpu/support.rst
+++ b/libc/docs/gpu/support.rst
@@ -130,7 +130,7 @@ fopen          |check|    |check|
 fread          |check|    |check|
 =============  =========  ============
 
-stdio.h
+time.h
 --------
 
 =============  =========  ============
@@ -139,3 +139,13 @@ Function Name  Available  RPC Required
 clock          |check|
 nanosleep      |check|
 =============  =========  ============
+
+assert.h
+--------
+
+=============  =========  ============
+Function Name  Available  RPC Required
+=============  =========  ============
+assert         |check|    |check|
+__assert_fail  |check|    |check|
+=============  =========  ============

diff  --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h
index f3277f42a32d20..07de9d72fbe62c 100644
--- a/libc/src/__support/GPU/utils.h
+++ b/libc/src/__support/GPU/utils.h
@@ -19,4 +19,19 @@
 #include "generic/utils.h"
 #endif
 
+namespace __llvm_libc {
+namespace gpu {
+/// Get the first active thread inside the lane.
+LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
+  return __builtin_ffsl(lane_mask) - 1;
+}
+
+/// Conditional that is only true for a single thread in a lane.
+LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
+  return gpu::get_lane_id() == get_first_lane_id(lane_mask);
+}
+
+} // namespace gpu
+} // namespace __llvm_libc
+
 #endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_IO_H

diff  --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h
index 49336fbc0332d3..fc95e5edf1c720 100644
--- a/libc/src/__support/RPC/rpc.h
+++ b/libc/src/__support/RPC/rpc.h
@@ -214,7 +214,7 @@ template <bool Invert, typename Packet> struct Process {
     // restrict to a single thread to avoid one thread dropping the lock, then
     // an unrelated warp claiming the lock, then a second thread in this warp
     // dropping the lock again.
-    clear_nth(lock, index, rpc::is_first_lane(lane_mask));
+    clear_nth(lock, index, gpu::is_first_lane(lane_mask));
     gpu::sync_lane(lane_mask);
   }
 
@@ -546,7 +546,7 @@ template <uint16_t opcode> LIBC_INLINE Client::Port Client::open() {
       continue;
     }
 
-    if (is_first_lane(lane_mask)) {
+    if (gpu::is_first_lane(lane_mask)) {
       process.packet[index].header.opcode = opcode;
       process.packet[index].header.mask = lane_mask;
     }

diff  --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h
index 1e2c53880cb747..8f500369541de9 100644
--- a/libc/src/__support/RPC/rpc_util.h
+++ b/libc/src/__support/RPC/rpc_util.h
@@ -30,16 +30,6 @@ LIBC_INLINE void sleep_briefly() {
 #endif
 }
 
-/// Get the first active thread inside the lane.
-LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
-  return __builtin_ffsl(lane_mask) - 1;
-}
-
-/// Conditional that is only true for a single thread in a lane.
-LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
-  return gpu::get_lane_id() == get_first_lane_id(lane_mask);
-}
-
 /// Conditional to indicate if this process is running on the GPU.
 LIBC_INLINE constexpr bool is_process_gpu() {
 #if defined(LIBC_TARGET_ARCH_IS_GPU)

diff  --git a/libc/src/assert/CMakeLists.txt b/libc/src/assert/CMakeLists.txt
index 57ff9e97aa2635..cb81e3b68b1dce 100644
--- a/libc/src/assert/CMakeLists.txt
+++ b/libc/src/assert/CMakeLists.txt
@@ -1,12 +1,18 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+else()
+  add_subdirectory(generic)
+endif()
+
+if(TARGET libc.src.assert.${LIBC_TARGET_OS}.__assert_fail)
+  set(assert_fail_dep libc.src.assert.${LIBC_TARGET_OS}.__assert_fail)
+else()
+  set(assert_fail_dep libc.src.assert.generic.__assert_fail)
+endif()
+
 add_entrypoint_object(
   __assert_fail
-  SRCS
-    __assert_fail.cpp
-  HDRS
-    __assert_fail.h
-    assert.h
+  ALIAS
   DEPENDS
-    libc.include.assert
-    libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.abort
+    ${assert_fail_dep}
 )

diff  --git a/libc/src/assert/generic/CMakeLists.txt b/libc/src/assert/generic/CMakeLists.txt
new file mode 100644
index 00000000000000..387ab32be2741c
--- /dev/null
+++ b/libc/src/assert/generic/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_entrypoint_object(
+  __assert_fail
+  SRCS
+    __assert_fail.cpp
+  HDRS
+    ../__assert_fail.h
+    ../assert.h
+  DEPENDS
+    libc.include.assert
+    libc.src.__support.OSUtil.osutil
+    libc.src.stdlib.abort
+)

diff  --git a/libc/src/assert/__assert_fail.cpp b/libc/src/assert/generic/__assert_fail.cpp
similarity index 100%
rename from libc/src/assert/__assert_fail.cpp
rename to libc/src/assert/generic/__assert_fail.cpp

diff  --git a/libc/src/assert/gpu/CMakeLists.txt b/libc/src/assert/gpu/CMakeLists.txt
new file mode 100644
index 00000000000000..3a4a0c7d10cfbe
--- /dev/null
+++ b/libc/src/assert/gpu/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_entrypoint_object(
+  __assert_fail
+  SRCS
+    __assert_fail.cpp
+  HDRS
+    ../__assert_fail.h
+    ../assert.h
+  DEPENDS
+    libc.include.assert
+    libc.src.__support.OSUtil.osutil
+    libc.src.__support.GPU.utils
+    libc.src.__support.CPP.atomic
+    libc.src.stdlib.abort
+)

diff  --git a/libc/src/assert/gpu/__assert_fail.cpp b/libc/src/assert/gpu/__assert_fail.cpp
new file mode 100644
index 00000000000000..b8ee168b069d16
--- /dev/null
+++ b/libc/src/assert/gpu/__assert_fail.cpp
@@ -0,0 +1,45 @@
+//===-- GPU definition of a libc internal assert macro ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/assert/__assert_fail.h"
+
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/GPU/utils.h"
+#include "src/__support/libc_assert.h"
+#include "src/stdlib/abort.h"
+
+namespace __llvm_libc {
+
+// A single-use lock to allow only a single thread to print the assertion.
+static cpp::Atomic<uint32_t> lock = 0;
+
+LLVM_LIBC_FUNCTION(void, __assert_fail,
+                   (const char *assertion, const char *file, unsigned line,
+                    const char *function)) {
+  uint64_t mask = gpu::get_lane_mask();
+  // We only want a single work group or warp to handle the assertion. Each
+  // group attempts to claim the lock, if it is already claimed we simply exit.
+  uint32_t claimed = gpu::is_first_lane(mask)
+                         ? !lock.fetch_or(1, cpp::MemoryOrder::ACQUIRE)
+                         : 0;
+  if (!gpu::broadcast_value(mask, claimed)) {
+#if defined(LIBC_TARGET_ARCH_IS_NVPTX)
+    LIBC_INLINE_ASM("exit;" ::: "memory");
+#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+    __builtin_amdgcn_endpgm();
+#endif
+    __builtin_unreachable();
+  }
+
+  // Only a single line should be printed if an assertion is hit.
+  if (gpu::is_first_lane(mask))
+    __llvm_libc::report_assertion_failure(assertion, file, line, function);
+  __llvm_libc::abort();
+}
+
+} // namespace __llvm_libc