[libc-commits] [libc] [libc] Run Benchmarks on 32 Threads by Default (PR #98964)

via libc-commits libc-commits at lists.llvm.org
Mon Jul 15 21:57:41 PDT 2024


https://github.com/jameshu15869 updated https://github.com/llvm/llvm-project/pull/98964

>From 25bcbd1765f6c22a8a395a47e42b461fa67eb49e Mon Sep 17 00:00:00 2001
From: jameshu15869 <jhudson15869 at gmail.com>
Date: Sun, 14 Jul 2024 18:35:54 -0400
Subject: [PATCH 1/2] run benchmarks on warps by default, adding the option for
 single threaded benchmarks

---
 libc/benchmarks/gpu/CMakeLists.txt                 |  6 ++++++
 libc/benchmarks/gpu/LibcGpuBenchmark.cpp           |  6 ++++--
 libc/benchmarks/gpu/LibcGpuBenchmark.h             | 14 +++++++++++---
 .../benchmarks/gpu/src/ctype/isalnum_benchmark.cpp |  2 ++
 4 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index eaeecbdacd23e..8c409bc6ef3ea 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -10,6 +10,10 @@ function(add_benchmark benchmark_name)
     "LINK_LIBRARIES" # Multi-value arguments
     ${ARGN}
   )
+  # We run benchmarks for a single warp with and give the 
+  # option to run only a single thread
+  set(BENCHMARK_NUM_THREADS 32)
+
   if(NOT libc.src.time.clock IN_LIST TARGET_LLVMLIBC_ENTRYPOINTS)
     message(FATAL_ERROR "target does not support clock")
   endif()
@@ -19,6 +23,8 @@ function(add_benchmark benchmark_name)
     LINK_LIBRARIES
       LibcGpuBenchmark.hermetic
       ${BENCHMARK_LINK_LIBRARIES}
+    LOADER_ARGS
+      --threads ${BENCHMARK_NUM_THREADS}
     ${BENCHMARK_UNPARSED_ARGUMENTS}
   )
   get_fq_target_name(${benchmark_name} fq_target_name)
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
index 23fff3e8180f7..2094d33e1e9e7 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
@@ -114,8 +114,10 @@ void Benchmark::run_benchmarks() {
       all_results.reset();
 
     gpu::sync_threads();
-    auto current_result = b->run();
-    all_results.update(current_result);
+    if (!(b->flags & BenchmarkFlags::SINGLE_THREADED) || id == 0) {
+      auto current_result = b->run();
+      all_results.update(current_result);
+    }
     gpu::sync_threads();
 
     if (id == 0)
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
index 1f813f8655de6..53f35768e1bf1 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.h
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -74,16 +74,19 @@ struct BenchmarkResult {
   clock_t total_time = 0;
 };
 
+enum BenchmarkFlags { SINGLE_THREADED = 0x1 };
+
 BenchmarkResult benchmark(const BenchmarkOptions &options,
                           cpp::function<uint64_t(void)> wrapper_func);
 
 class Benchmark {
   const cpp::function<uint64_t(void)> func;
   const cpp::string_view name;
+  const uint8_t flags;
 
 public:
-  Benchmark(cpp::function<uint64_t(void)> func, char const *name)
-      : func(func), name(name) {
+  Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
+      : func(func), name(name), flags(flags) {
     add_benchmark(this);
   }
 
@@ -104,6 +107,11 @@ class Benchmark {
 
 #define BENCHMARK(SuiteName, TestName, Func)                                   \
   LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
-      Func, #SuiteName "." #TestName)
+      Func, #SuiteName "." #TestName, 0)
+
+#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func)                   \
+  LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance(     \
+      Func, #SuiteName "." #TestName,                                          \
+      LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
 
 #endif
diff --git a/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp b/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
index 6f8d247902f76..d9c1a804ec506 100644
--- a/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
+++ b/libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
@@ -7,6 +7,8 @@ uint64_t BM_IsAlnum() {
   return LIBC_NAMESPACE::latency(LIBC_NAMESPACE::isalnum, x);
 }
 BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnum, BM_IsAlnum);
+SINGLE_THREADED_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleThread,
+                          BM_IsAlnum);
 
 uint64_t BM_IsAlnumCapital() {
   char x = 'A';

>From b96f564eed7964005125af7e54f51f34445cbe2f Mon Sep 17 00:00:00 2001
From: jameshu15869 <jhudson15869 at gmail.com>
Date: Tue, 16 Jul 2024 00:56:10 -0400
Subject: [PATCH 2/2] specify threads when registering benchmarks in cmake

---
 libc/benchmarks/gpu/CMakeLists.txt           | 9 +++++----
 libc/benchmarks/gpu/src/ctype/CMakeLists.txt | 4 ++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index 8c409bc6ef3ea..8458842b77fc1 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -6,13 +6,14 @@ function(add_benchmark benchmark_name)
   cmake_parse_arguments(
     "BENCHMARK"
     "" # Optional arguments
-    "" # Single value arguments
+    "NUM_THREADS" # Single value arguments
     "LINK_LIBRARIES" # Multi-value arguments
     ${ARGN}
   )
-  # We run benchmarks for a single warp with and give the 
-  # option to run only a single thread
-  set(BENCHMARK_NUM_THREADS 32)
+
+  if(NOT ${BENCHMARK_NUM_THREADS})
+    set(BENCHMARK_NUM_THREADS 1)
+  endif()
 
   if(NOT libc.src.time.clock IN_LIST TARGET_LLVMLIBC_ENTRYPOINTS)
     message(FATAL_ERROR "target does not support clock")
diff --git a/libc/benchmarks/gpu/src/ctype/CMakeLists.txt b/libc/benchmarks/gpu/src/ctype/CMakeLists.txt
index 79f01425770da..f41e5c94c5060 100644
--- a/libc/benchmarks/gpu/src/ctype/CMakeLists.txt
+++ b/libc/benchmarks/gpu/src/ctype/CMakeLists.txt
@@ -8,6 +8,8 @@ add_benchmark(
     isalnum_benchmark.cpp
   DEPENDS
     libc.src.ctype.isalnum
+  NUM_THREADS
+    32
 )
 
 add_benchmark(
@@ -18,4 +20,6 @@ add_benchmark(
     isalpha_benchmark.cpp
   DEPENDS
     libc.src.ctype.isalpha
+  NUM_THREADS
+    32
 )



More information about the libc-commits mailing list