[libc-commits] [libc] 8badfcc - [libc] Add Multithreaded GPU Benchmarks (#98964)

via libc-commits libc-commits at lists.llvm.org
Thu Jul 18 05:18:31 PDT 2024


Author: jameshu15869
Date: 2024-07-18T07:18:23-05:00
New Revision: 8badfccefeaff2c05ef71a8d2fd6d803a1b4e129

URL: https://github.com/llvm/llvm-project/commit/8badfccefeaff2c05ef71a8d2fd6d803a1b4e129
DIFF: https://github.com/llvm/llvm-project/commit/8badfccefeaff2c05ef71a8d2fd6d803a1b4e129.diff

LOG: [libc] Add Multithreaded GPU Benchmarks (#98964)

This PR runs benchmarks on a 32 threads (A single warp on NVPTX) by
default, adding the option for single threaded benchmarks. We can
specify that a benchmark should be run on a single thread using the
`SINGLE_THREADED_BENCHMARK()` macro.

I chose to use a flag here so that other options could be added in the
future.

Added: 
    

Modified: 
    libc/benchmarks/gpu/CMakeLists.txt
    libc/benchmarks/gpu/LibcGpuBenchmark.cpp
    libc/benchmarks/gpu/LibcGpuBenchmark.h
    libc/benchmarks/gpu/src/ctype/CMakeLists.txt
    libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp

Removed: 
    


################################################################################
diff  --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index 14ba9f3f64b48..29e27724e1ab3 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -10,6 +10,7 @@ function(add_benchmark benchmark_name)
     "LINK_LIBRARIES" # Multi-value arguments
     ${ARGN}
   )
+  
   if(NOT libc.src.time.clock IN_LIST TARGET_LLVMLIBC_ENTRYPOINTS)
     message(FATAL_ERROR "target does not support clock")
   endif()

diff  --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
index 23fff3e8180f7..c926d8efd7db2 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
@@ -114,8 +114,13 @@ void Benchmark::run_benchmarks() {
       all_results.reset();
 
     gpu::sync_threads();
-    auto current_result = b->run();
-    all_results.update(current_result);
+    if (!b->flags ||
+        ((b->flags & BenchmarkFlags::SINGLE_THREADED) && id == 0) ||
+        ((b->flags & BenchmarkFlags::SINGLE_WAVE) &&
+         id < gpu::get_lane_size())) {
+      auto current_result = b->run();
+      all_results.update(current_result);
+    }
     gpu::sync_threads();
 
     if (id == 0)

diff  --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
index 1f813f8655de6..29d7ba8b0a132 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.h
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -74,16 +74,19 @@ struct BenchmarkResult {
   clock_t total_time = 0;
 };
 
+enum BenchmarkFlags { SINGLE_THREADED = 0x1, SINGLE_WAVE = 0x2 };
+
 BenchmarkResult benchmark(const BenchmarkOptions &options,
                           cpp::function<uint64_t(void)> wrapper_func);
 
 class Benchmark {
   const cpp::function<uint64_t(void)> func;
   const cpp::string_view name;
+  const uint8_t flags;
 
 public:
-  Benchmark(cpp::function<uint64_t(void)> func, char const *name)
-      : func(func), name(name) {
+  Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
+      : func(func), name(name), flags(flags) {
     add_benchmark(this);
   }
 
@@ -104,6 +107,16 @@ class Benchmark {
 
 #define BENCHMARK(SuiteName, TestName, Func)                                   \
   LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
-      Func, #SuiteName "." #TestName)
+      Func, #SuiteName "." #TestName, 0)
+
+#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func)                   \
+  LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
+      Func, #SuiteName "." #TestName,                                          \
+      LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
+
+#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func)                       \
+  LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
+      Func, #SuiteName "." #TestName,                                          \
+      LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)
 
 #endif

diff  --git a/libc/benchmarks/gpu/src/ctype/CMakeLists.txt b/libc/benchmarks/gpu/src/ctype/CMakeLists.txt
index 79f01425770da..f277624dbb901 100644
--- a/libc/benchmarks/gpu/src/ctype/CMakeLists.txt
+++ b/libc/benchmarks/gpu/src/ctype/CMakeLists.txt
@@ -8,6 +8,8 @@ add_benchmark(
     isalnum_benchmark.cpp
   DEPENDS
     libc.src.ctype.isalnum
+  LOADER_ARGS
+    --threads 64
 )
 
 add_benchmark(

diff  --git a/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp b/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
index 6f8d247902f76..ffa5a99860bfc 100644
--- a/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
+++ b/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
@@ -7,6 +7,10 @@ uint64_t BM_IsAlnum() {
   return LIBC_NAMESPACE::latency(LIBC_NAMESPACE::isalnum, x);
 }
 BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnum, BM_IsAlnum);
+SINGLE_THREADED_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleThread,
+                          BM_IsAlnum);
+SINGLE_WAVE_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleWave,
+                      BM_IsAlnum);
 
 uint64_t BM_IsAlnumCapital() {
   char x = 'A';


        


More information about the libc-commits mailing list