[libc-commits] [libc] cf5f311 - [libc] Polish GPU benchmarking (#153900)
via libc-commits
libc-commits at lists.llvm.org
Fri Aug 15 17:51:55 PDT 2025
Author: Leandro Lacerda
Date: 2025-08-15T19:51:52-05:00
New Revision: cf5f311b26d4db7296b28d52742c87f6a2836120
URL: https://github.com/llvm/llvm-project/commit/cf5f311b26d4db7296b28d52742c87f6a2836120
DIFF: https://github.com/llvm/llvm-project/commit/cf5f311b26d4db7296b28d52742c87f6a2836120.diff
LOG: [libc] Polish GPU benchmarking (#153900)
This patch provides cleanups and improvements for the GPU benchmarking
infrastructure. The key changes are:
- Fix benchmark convergence bug: Round up the scaled iteration count
(ceil) to ensure it grows properly. The previous truncation logic causes
the iteration count to get stuck.
- Resolve remaining compiler warning.
- Remove unused `BenchmarkLogger` files: This is dead code that added
maintenance and cognitive overhead without providing functionality.
- Improve build hygiene: Clean up headers and CMake dependencies to
strictly follow the 'include what you use' (IWYU) principle.
Added:
Modified:
libc/benchmarks/gpu/CMakeLists.txt
libc/benchmarks/gpu/LibcGpuBenchmark.cpp
libc/benchmarks/gpu/LibcGpuBenchmark.h
libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt
libc/benchmarks/gpu/timing/amdgpu/timing.h
libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt
libc/benchmarks/gpu/timing/nvptx/timing.h
Removed:
libc/benchmarks/gpu/BenchmarkLogger.cpp
libc/benchmarks/gpu/BenchmarkLogger.h
################################################################################
diff --git a/libc/benchmarks/gpu/BenchmarkLogger.cpp b/libc/benchmarks/gpu/BenchmarkLogger.cpp
deleted file mode 100644
index d5996a74f6dd7..0000000000000
--- a/libc/benchmarks/gpu/BenchmarkLogger.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-#include "benchmarks/gpu/BenchmarkLogger.h"
-#include "hdr/stdint_proxy.h"
-#include "src/__support/CPP/string.h"
-#include "src/__support/CPP/string_view.h"
-#include "src/__support/OSUtil/io.h" // write_to_stderr
-#include "src/__support/big_int.h" // is_big_int
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128
-#include "src/__support/uint128.h"
-
-namespace LIBC_NAMESPACE_DECL {
-namespace benchmarks {
-
-// cpp::string_view specialization
-template <>
-BenchmarkLogger &
- BenchmarkLogger::operator<< <cpp::string_view>(cpp::string_view str) {
- LIBC_NAMESPACE::write_to_stderr(str);
- return *this;
-}
-
-// cpp::string specialization
-template <>
-BenchmarkLogger &BenchmarkLogger::operator<< <cpp::string>(cpp::string str) {
- return *this << static_cast<cpp::string_view>(str);
-}
-
-// const char* specialization
-template <>
-BenchmarkLogger &BenchmarkLogger::operator<< <const char *>(const char *str) {
- return *this << cpp::string_view(str);
-}
-
-// char* specialization
-template <> BenchmarkLogger &BenchmarkLogger::operator<< <char *>(char *str) {
- return *this << cpp::string_view(str);
-}
-
-// char specialization
-template <> BenchmarkLogger &BenchmarkLogger::operator<<(char ch) {
- return *this << cpp::string_view(&ch, 1);
-}
-
-// bool specialization
-template <> BenchmarkLogger &BenchmarkLogger::operator<<(bool cond) {
- return *this << (cond ? "true" : "false");
-}
-
-// void * specialization
-template <> BenchmarkLogger &BenchmarkLogger::operator<<(void *addr) {
- return *this << "0x" << cpp::to_string(reinterpret_cast<uintptr_t>(addr));
-}
-
-template <typename T> BenchmarkLogger &BenchmarkLogger::operator<<(T t) {
- if constexpr (is_big_int_v<T> ||
- (cpp::is_integral_v<T> && cpp::is_unsigned_v<T> &&
- (sizeof(T) > sizeof(uint64_t)))) {
- static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt");
- const IntegerToString<T, radix::Hex::WithPrefix> buffer(t);
- return *this << buffer.view();
- } else {
- return *this << cpp::to_string(t);
- }
-}
-
-// is_integral specializations
-// char is already specialized to handle character
-template BenchmarkLogger &BenchmarkLogger::operator<< <short>(short);
-template BenchmarkLogger &BenchmarkLogger::operator<< <int>(int);
-template BenchmarkLogger &BenchmarkLogger::operator<< <long>(long);
-template BenchmarkLogger &BenchmarkLogger::operator<< <long long>(long long);
-template BenchmarkLogger &
- BenchmarkLogger::operator<< <unsigned char>(unsigned char);
-template BenchmarkLogger &
- BenchmarkLogger::operator<< <unsigned short>(unsigned short);
-template BenchmarkLogger &
- BenchmarkLogger::operator<< <unsigned int>(unsigned int);
-template BenchmarkLogger &
- BenchmarkLogger::operator<< <unsigned long>(unsigned long);
-template BenchmarkLogger &
- BenchmarkLogger::operator<< <unsigned long long>(unsigned long long);
-
-#ifdef LIBC_TYPES_HAS_INT128
-template BenchmarkLogger &
- BenchmarkLogger::operator<< <__uint128_t>(__uint128_t);
-#endif // LIBC_TYPES_HAS_INT128
-template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<128>>(UInt<128>);
-template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<192>>(UInt<192>);
-template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<256>>(UInt<256>);
-template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<320>>(UInt<320>);
-
-// TODO: Add floating point formatting once it's supported by StringStream.
-
-BenchmarkLogger log;
-
-} // namespace benchmarks
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/benchmarks/gpu/BenchmarkLogger.h b/libc/benchmarks/gpu/BenchmarkLogger.h
deleted file mode 100644
index 2b22aba085f86..0000000000000
--- a/libc/benchmarks/gpu/BenchmarkLogger.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- Utilities to log to standard output during tests --------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H
-#define LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H
-
-#include "src/__support/macros/config.h"
-
-namespace LIBC_NAMESPACE_DECL {
-namespace benchmarks {
-
-// A class to log to standard output in the context of hermetic tests.
-struct BenchmarkLogger {
- constexpr BenchmarkLogger() = default;
- template <typename T> BenchmarkLogger &operator<<(T);
-};
-
-// A global TestLogger instance to be used in tests.
-extern BenchmarkLogger log;
-
-} // namespace benchmarks
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif /* LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H */
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index beedac78d4826..6ca134b12a479 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -38,31 +38,25 @@ add_unittest_framework_library(
SRCS
LibcGpuBenchmark.cpp
LibcGpuBenchmarkMain.cpp
- BenchmarkLogger.cpp
HDRS
LibcGpuBenchmark.h
- BenchmarkLogger.h
DEPENDS
+ libc.benchmarks.gpu.timing.timing
libc.hdr.stdint_proxy
- libc.src.__support.big_int
- libc.src.__support.c_string
libc.src.__support.CPP.string
libc.src.__support.CPP.string_view
libc.src.__support.CPP.type_traits
- libc.src.__support.CPP.limits
libc.src.__support.CPP.algorithm
libc.src.__support.CPP.atomic
libc.src.__support.CPP.array
- libc.src.__support.fixed_point.fx_rep
- libc.src.__support.macros.properties.types
- libc.src.__support.OSUtil.osutil
- libc.src.__support.uint128
libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.FPUtil.sqrt
libc.src.__support.fixedvector
- libc.src.time.clock
- libc.benchmarks.gpu.timing.timing
+ libc.src.__support.GPU.utils
+ libc.src.__support.time.gpu.time_utils
libc.src.stdio.printf
+ libc.src.time.clock
)
add_subdirectory(src)
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
index ef816c51a87d7..a4a0ff4ec46e5 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
@@ -2,16 +2,17 @@
#include "hdr/stdint_proxy.h"
#include "src/__support/CPP/algorithm.h"
-#include "src/__support/CPP/array.h"
#include "src/__support/CPP/atomic.h"
#include "src/__support/CPP/string.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/fixedvector.h"
#include "src/__support/macros/config.h"
#include "src/__support/time/gpu/time_utils.h"
#include "src/stdio/printf.h"
+#include "src/time/clock.h"
namespace LIBC_NAMESPACE_DECL {
namespace benchmarks {
@@ -134,11 +135,13 @@ void print_results(Benchmark *b) {
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
LIBC_NAMESPACE::printf(
- "%-24s |%15.0f |%9.0f |%8llu |%8llu |%11llu |%9u |\n",
+ "%-24s |%15.0f |%9.0f |%8llu |%8llu |%15llu |%9u |\n",
b->get_test_name().data(), final_result.cycles,
- final_result.standard_deviation, (unsigned long long)final_result.min,
- (unsigned long long)final_result.max,
- (unsigned long long)final_result.total_iterations, (unsigned)num_threads);
+ final_result.standard_deviation,
+ static_cast<unsigned long long>(final_result.min),
+ static_cast<unsigned long long>(final_result.max),
+ static_cast<unsigned long long>(final_result.total_iterations),
+ static_cast<unsigned>(num_threads));
}
void print_header() {
@@ -147,7 +150,7 @@ void print_header() {
benchmarks[0]->get_suite_name().data());
LIBC_NAMESPACE::printf("%s", RESET);
cpp::string titles = "Benchmark | Cycles (Mean) | Stddev | "
- " Min | Max | Iterations | Threads |\n";
+ " Min | Max | Iterations | Threads |\n";
LIBC_NAMESPACE::printf(titles.data());
cpp::string separator(titles.size(), '-');
@@ -226,7 +229,8 @@ BenchmarkResult benchmark(const BenchmarkOptions &options,
change_ratio < options.epsilon)
break;
- iterations = static_cast<uint32_t>(iterations * options.scaling_factor);
+ iterations = static_cast<uint32_t>(
+ fputil::ceil(iterations * options.scaling_factor));
}
const auto &estimator = rep.get_estimator();
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
index 60f69edf86556..e36e93c7efc18 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.h
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -1,18 +1,16 @@
#ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
#define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H
-#include "benchmarks/gpu/BenchmarkLogger.h"
#include "benchmarks/gpu/timing/timing.h"
+
#include "hdr/stdint_proxy.h"
#include "src/__support/CPP/algorithm.h"
#include "src/__support/CPP/array.h"
-#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/string_view.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/macros/config.h"
-#include "src/time/clock.h"
namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt b/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt
index d6a89d04dab97..f85152e69c346 100644
--- a/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt
@@ -4,10 +4,11 @@ add_header_library(
timing.h
DEPENDS
libc.hdr.stdint_proxy
- libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.macros.attributes
libc.src.__support.CPP.algorithm
libc.src.__support.CPP.array
+ libc.src.__support.CPP.atomic
libc.src.__support.CPP.type_traits
+ libc.src.__support.GPU.utils
)
diff --git a/libc/benchmarks/gpu/timing/amdgpu/timing.h b/libc/benchmarks/gpu/timing/amdgpu/timing.h
index de721a2d6ce6b..b4a174f729817 100644
--- a/libc/benchmarks/gpu/timing/amdgpu/timing.h
+++ b/libc/benchmarks/gpu/timing/amdgpu/timing.h
@@ -15,7 +15,6 @@
#include "src/__support/CPP/atomic.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/GPU/utils.h"
-#include "src/__support/common.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
diff --git a/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt b/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt
index 801080e7a6e98..4615f53e3d247 100644
--- a/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt
+++ b/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt
@@ -4,10 +4,11 @@ add_header_library(
timing.h
DEPENDS
libc.hdr.stdint_proxy
- libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.macros.attributes
libc.src.__support.CPP.algorithm
libc.src.__support.CPP.array
+ libc.src.__support.CPP.atomic
libc.src.__support.CPP.type_traits
+ libc.src.__support.GPU.utils
)
diff --git a/libc/benchmarks/gpu/timing/nvptx/timing.h b/libc/benchmarks/gpu/timing/nvptx/timing.h
index 133032ca08423..0c93a67129b8d 100644
--- a/libc/benchmarks/gpu/timing/nvptx/timing.h
+++ b/libc/benchmarks/gpu/timing/nvptx/timing.h
@@ -13,9 +13,7 @@
#include "src/__support/CPP/algorithm.h"
#include "src/__support/CPP/array.h"
#include "src/__support/CPP/atomic.h"
-#include "src/__support/CPP/type_traits.h"
#include "src/__support/GPU/utils.h"
-#include "src/__support/common.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
@@ -66,7 +64,7 @@ template <typename F, typename T>
uint64_t stop = gpu::processor_clock();
cpp::atomic_thread_fence(cpp::MemoryOrder::ACQ_REL);
asm("" ::"r"(stop));
- volatile T output = result;
+ volatile auto output = result;
// Return the time elapsed.
return stop - start;
More information about the libc-commits
mailing list