[libc-commits] [libc] 86860be - [libc] Make 'rand()' thread-safe using atomics instead of TLS (#96692)

via libc-commits libc-commits at lists.llvm.org
Wed Jun 26 05:03:31 PDT 2024


Author: Joseph Huber
Date: 2024-06-26T07:03:28-05:00
New Revision: 86860be2886283210083e5e3f20048e559cc059e

URL: https://github.com/llvm/llvm-project/commit/86860be2886283210083e5e3f20048e559cc059e
DIFF: https://github.com/llvm/llvm-project/commit/86860be2886283210083e5e3f20048e559cc059e.diff

LOG: [libc] Make 'rand()' thread-safe using atomics instead of TLS (#96692)

Summary:
Currently, we implement the `rand` function using thread-local storage.
This is somewhat problematic because not every target supports TLS, and
even more do not support non-zero initializers on TLS.

The C standard states that the `rand()` function need not be thread,
safe. However, many implementations provide thread-safety anyway.
There's some confusing language in the 'rationale' section of
https://pubs.opengroup.org/onlinepubs/9699919799/functions/rand.html,
but given that `glibc` uses a lock, I think we should make this thread
safe as well. it mentions that threaded behavior is desirable and can be
done in the two ways:

1. A single per-process sequence of pseudo-random numbers that is shared
by all threads that call rand()
2. A different sequence of pseudo-random numbers for each thread that
calls rand()

The current implementation is (2.) and this patch moves it to (1.). This
is beneficial for the GPU case and more generic support. The downside is
that it's slightly slower to do these atomic operations, the fast path
will be two atomic reads and an atomic write.

Added: 
    

Modified: 
    libc/src/stdlib/CMakeLists.txt
    libc/src/stdlib/rand.cpp
    libc/src/stdlib/rand_util.cpp
    libc/src/stdlib/rand_util.h
    libc/src/stdlib/srand.cpp
    libc/test/src/stdlib/rand_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 61c05f0738959..677bf358c82c4 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -304,6 +304,7 @@ add_entrypoint_object(
   DEPENDS
     .rand_util
     libc.include.stdlib
+    libc.src.__support.threads.sleep
 )
 
 add_entrypoint_object(

diff  --git a/libc/src/stdlib/rand.cpp b/libc/src/stdlib/rand.cpp
index ad543b4048a94..ff3875c2f6959 100644
--- a/libc/src/stdlib/rand.cpp
+++ b/libc/src/stdlib/rand.cpp
@@ -8,6 +8,7 @@
 
 #include "src/stdlib/rand.h"
 #include "src/__support/common.h"
+#include "src/__support/threads/sleep.h"
 #include "src/stdlib/rand_util.h"
 
 namespace LIBC_NAMESPACE {
@@ -15,12 +16,17 @@ namespace LIBC_NAMESPACE {
 // An implementation of the xorshift64star pseudo random number generator. This
 // is a good general purpose generator for most non-cryptographics applications.
 LLVM_LIBC_FUNCTION(int, rand, (void)) {
-  unsigned long x = rand_next;
-  x ^= x >> 12;
-  x ^= x << 25;
-  x ^= x >> 27;
-  rand_next = x;
-  return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
+  unsigned long orig = rand_next.load(cpp::MemoryOrder::RELAXED);
+  for (;;) {
+    unsigned long x = orig;
+    x ^= x >> 12;
+    x ^= x << 25;
+    x ^= x >> 27;
+    if (rand_next.compare_exchange_strong(orig, x, cpp::MemoryOrder::ACQUIRE,
+                                          cpp::MemoryOrder::RELAXED))
+      return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
+    sleep_briefly();
+  }
 }
 
 } // namespace LIBC_NAMESPACE

diff  --git a/libc/src/stdlib/rand_util.cpp b/libc/src/stdlib/rand_util.cpp
index 1f3dbce9c7860..81ee358935716 100644
--- a/libc/src/stdlib/rand_util.cpp
+++ b/libc/src/stdlib/rand_util.cpp
@@ -7,18 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/stdlib/rand_util.h"
+#include "src/__support/CPP/atomic.h"
 #include "src/__support/macros/attributes.h"
 
 namespace LIBC_NAMESPACE {
 
-#ifdef LIBC_TARGET_ARCH_IS_GPU
-// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
-// a standard compliant default value.
-ThreadLocal<unsigned long> rand_next;
-#else
-// C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
-// the 'srand' function was called with a value of '1'.
-LIBC_THREAD_LOCAL unsigned long rand_next = 1;
-#endif
+//  C standard 7.10p2: If 'rand' is called before 'srand' it is to
+// proceed as if the 'srand' function was called with a value of '1'.
+cpp::Atomic<unsigned long> rand_next = 1;
 
 } // namespace LIBC_NAMESPACE

diff  --git a/libc/src/stdlib/rand_util.h b/libc/src/stdlib/rand_util.h
index cadd6b5cdcbb8..5d7febf8248d8 100644
--- a/libc/src/stdlib/rand_util.h
+++ b/libc/src/stdlib/rand_util.h
@@ -9,33 +9,15 @@
 #ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 #define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 
-#include "src/__support/GPU/utils.h"
+#include "src/__support/CPP/atomic.h"
 #include "src/__support/macros/attributes.h"
 
 namespace LIBC_NAMESPACE {
 
-#ifdef LIBC_TARGET_ARCH_IS_GPU
-// Implement thread local storage on the GPU using local memory. Each thread
-// gets its slot in the local memory array and is private to the group.
-// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
-// inefficient and incomplete stand-in until that is done.
-template <typename T> class ThreadLocal {
-private:
-  static constexpr long MAX_THREADS = 1024;
-  [[clang::loader_uninitialized]] static inline gpu::Local<T>
-      storage[MAX_THREADS];
-
-public:
-  LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
-  LIBC_INLINE void operator=(const T &value) {
-    storage[gpu::get_thread_id()] = value;
-  }
-};
-
-extern ThreadLocal<unsigned long> rand_next;
-#else
-extern LIBC_THREAD_LOCAL unsigned long rand_next;
-#endif
+// The ISO C standard does not explicitly require thread-safe behavior for the
+// generic `rand()` function. Some implementations expect it however, so we
+// provide it here.
+extern cpp::Atomic<unsigned long> rand_next;
 
 } // namespace LIBC_NAMESPACE
 

diff  --git a/libc/src/stdlib/srand.cpp b/libc/src/stdlib/srand.cpp
index 008c7a9e565e4..21166c7a6754e 100644
--- a/libc/src/stdlib/srand.cpp
+++ b/libc/src/stdlib/srand.cpp
@@ -12,6 +12,8 @@
 
 namespace LIBC_NAMESPACE {
 
-LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) { rand_next = seed; }
+LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) {
+  rand_next.store(seed, cpp::MemoryOrder::RELAXED);
+}
 
 } // namespace LIBC_NAMESPACE

diff  --git a/libc/test/src/stdlib/rand_test.cpp b/libc/test/src/stdlib/rand_test.cpp
index 7934dc16aa461..6f25708e53905 100644
--- a/libc/test/src/stdlib/rand_test.cpp
+++ b/libc/test/src/stdlib/rand_test.cpp
@@ -23,15 +23,12 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
     vals[i] = val;
   }
 
-  // FIXME: The GPU implementation cannot initialize the seed correctly.
-#ifndef LIBC_TARGET_ARCH_IS_GPU
   // The C standard specifies that if 'srand' is never called it should behave
   // as if 'srand' was called with a value of 1. If we seed the value with 1 we
   // should get the same sequence as the unseeded version.
   LIBC_NAMESPACE::srand(1);
   for (size_t i = 0; i < 1000; ++i)
     ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
-#endif
 }
 
 TEST(LlvmLibcRandTest, SetSeed) {


        


More information about the libc-commits mailing list