[libc-commits] [libc] 630037e - [libc] Partially implement 'rand' for the GPU (#66167)

via libc-commits libc-commits at lists.llvm.org
Thu Oct 19 14:01:47 PDT 2023


Author: Joseph Huber
Date: 2023-10-19T17:01:43-04:00
New Revision: 630037ede4cec8870764bed27c1289388ac33097

URL: https://github.com/llvm/llvm-project/commit/630037ede4cec8870764bed27c1289388ac33097
DIFF: https://github.com/llvm/llvm-project/commit/630037ede4cec8870764bed27c1289388ac33097.diff

LOG: [libc] Partially implement 'rand' for the GPU (#66167)

Summary:
This patch partially implements the `rand` function on the GPU. This is
partial because the GPU currently doesn't support thread local storage
or static initializers. To implement this on the GPU. I use 1/8th of the
local / shared memory quota to treak the shared memory as thread local
storage. This is done by simply allocating enough storage for each
thread in the block and indexing into this based off of the thread id.
The downside to this is that it does not initialize `srand` correctly to
be `1` as the standard says, it is also wasteful. In the future we
should figure out a way to support TLS on the GPU so that this can be
completely common and less resource intensive.

Added: 
    

Modified: 
    libc/config/gpu/entrypoints.txt
    libc/src/stdlib/rand.cpp
    libc/src/stdlib/rand_util.cpp
    libc/src/stdlib/rand_util.h
    libc/test/src/stdlib/rand_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index b20008e2784c412..756cddb0e8e5287 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -63,6 +63,8 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.lldiv
     libc.src.stdlib.qsort
     libc.src.stdlib.qsort_r
+    libc.src.stdlib.rand
+    libc.src.stdlib.srand
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol

diff  --git a/libc/src/stdlib/rand.cpp b/libc/src/stdlib/rand.cpp
index c5737a8b8454775..ad543b4048a949e 100644
--- a/libc/src/stdlib/rand.cpp
+++ b/libc/src/stdlib/rand.cpp
@@ -15,10 +15,12 @@ namespace LIBC_NAMESPACE {
 // An implementation of the xorshift64star pseudo random number generator. This
 // is a good general purpose generator for most non-cryptographics applications.
 LLVM_LIBC_FUNCTION(int, rand, (void)) {
-  rand_next ^= rand_next >> 12;
-  rand_next ^= rand_next << 25;
-  rand_next ^= rand_next >> 27;
-  return static_cast<int>((rand_next * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
+  unsigned long x = rand_next;
+  x ^= x >> 12;
+  x ^= x << 25;
+  x ^= x >> 27;
+  rand_next = x;
+  return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
 }
 
 } // namespace LIBC_NAMESPACE

diff  --git a/libc/src/stdlib/rand_util.cpp b/libc/src/stdlib/rand_util.cpp
index fc2cd304e77d297..1f3dbce9c786088 100644
--- a/libc/src/stdlib/rand_util.cpp
+++ b/libc/src/stdlib/rand_util.cpp
@@ -11,8 +11,14 @@
 
 namespace LIBC_NAMESPACE {
 
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
+// a standard compliant default value.
+ThreadLocal<unsigned long> rand_next;
+#else
 // C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
 // the 'srand' function was called with a value of '1'.
 LIBC_THREAD_LOCAL unsigned long rand_next = 1;
+#endif
 
 } // namespace LIBC_NAMESPACE

diff  --git a/libc/src/stdlib/rand_util.h b/libc/src/stdlib/rand_util.h
index 55c36a45bbc6b94..cadd6b5cdcbb8c0 100644
--- a/libc/src/stdlib/rand_util.h
+++ b/libc/src/stdlib/rand_util.h
@@ -9,11 +9,33 @@
 #ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 #define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 
+#include "src/__support/GPU/utils.h"
 #include "src/__support/macros/attributes.h"
 
 namespace LIBC_NAMESPACE {
 
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+// Implement thread local storage on the GPU using local memory. Each thread
+// gets its slot in the local memory array and is private to the group.
+// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
+// inefficient and incomplete stand-in until that is done.
+template <typename T> class ThreadLocal {
+private:
+  static constexpr long MAX_THREADS = 1024;
+  [[clang::loader_uninitialized]] static inline gpu::Local<T>
+      storage[MAX_THREADS];
+
+public:
+  LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
+  LIBC_INLINE void operator=(const T &value) {
+    storage[gpu::get_thread_id()] = value;
+  }
+};
+
+extern ThreadLocal<unsigned long> rand_next;
+#else
 extern LIBC_THREAD_LOCAL unsigned long rand_next;
+#endif
 
 } // namespace LIBC_NAMESPACE
 

diff  --git a/libc/test/src/stdlib/rand_test.cpp b/libc/test/src/stdlib/rand_test.cpp
index 6f25708e539053d..7934dc16aa461a8 100644
--- a/libc/test/src/stdlib/rand_test.cpp
+++ b/libc/test/src/stdlib/rand_test.cpp
@@ -23,12 +23,15 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
     vals[i] = val;
   }
 
+  // FIXME: The GPU implementation cannot initialize the seed correctly.
+#ifndef LIBC_TARGET_ARCH_IS_GPU
   // The C standard specifies that if 'srand' is never called it should behave
   // as if 'srand' was called with a value of 1. If we seed the value with 1 we
   // should get the same sequence as the unseeded version.
   LIBC_NAMESPACE::srand(1);
   for (size_t i = 0; i < 1000; ++i)
     ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
+#endif
 }
 
 TEST(LlvmLibcRandTest, SetSeed) {


        


More information about the libc-commits mailing list