[libc-commits] [libc] [libc][math] Optimize misc basic math operations with builtins when available (PR #99037)

Tue Jul 16 06:24:19 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-libc

Author: OverMighty (overmighty)

<details>
<summary>Changes</summary>



---

Patch is 26.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99037.diff


21 Files Affected:

- (modified) libc/cmake/modules/CheckCompilerFeatures.cmake (+3) 
- (modified) libc/cmake/modules/LLVMLibCCompileOptionRules.cmake (+7) 
- (modified) libc/cmake/modules/LLVMLibCFlagRules.cmake (+3) 
- (added) libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp (+14) 
- (modified) libc/src/math/generic/CMakeLists.txt (+24) 
- (modified) libc/src/math/generic/copysign.cpp (+4) 
- (modified) libc/src/math/generic/copysignf.cpp (+4) 
- (modified) libc/src/math/generic/copysignf16.cpp (+4) 
- (modified) libc/src/math/generic/fabs.cpp (+7-1) 
- (modified) libc/src/math/generic/fabsf.cpp (+7-1) 
- (modified) libc/src/math/generic/fabsf16.cpp (+7-1) 
- (modified) libc/src/math/generic/fmaximum_num.cpp (+5-1) 
- (modified) libc/src/math/generic/fmaximum_numf.cpp (+5-1) 
- (modified) libc/src/math/generic/fmaximum_numf16.cpp (+4) 
- (modified) libc/src/math/generic/fminimum_num.cpp (+5-1) 
- (modified) libc/src/math/generic/fminimum_numf.cpp (+5-1) 
- (modified) libc/src/math/generic/fminimum_numf16.cpp (+4) 
- (modified) libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h (+4-3) 
- (modified) libc/test/src/math/performance_testing/CMakeLists.txt (+20) 
- (modified) libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h (+29-15) 
- (added) libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp (+153) 


``````````diff

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a6d793d495c45..2bab968f901eb 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -5,6 +5,7 @@
 set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
+    "builtin_fmax_fmin"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -82,6 +83,8 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
     elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
+    elseif(${feature} STREQUAL "builtin_fmax_fmin")
+      set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 253da4ae890e5..ead578f95ac72 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -6,6 +6,7 @@ function(_get_compile_options_from_flags output_var)
   endif()
   check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN})
   check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN})
+  check_flag(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG ${MISC_MATH_BASIC_OPS_OPT_FLAG} ${ARGN})
 
   if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
     if(ADD_FMA_FLAG)
@@ -37,6 +38,12 @@ function(_get_compile_options_from_flags output_var)
     if(ADD_EXPLICIT_SIMD_OPT_FLAG)
       list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT")
     endif()
+    if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
+      list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
+      if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
+        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
+      endif()
+    endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
       list(APPEND compile_options "/arch:AVX2")
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index eca7ba8d183e6..4398fe55db5aa 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -263,6 +263,9 @@ set(FMA_OPT_FLAG "FMA_OPT")
 set(ROUND_OPT_FLAG "ROUND_OPT")
 # This flag controls whether we use explicit SIMD instructions or not.
 set(EXPLICIT_SIMD_OPT_FLAG "EXPLICIT_SIMD_OPT")
+# This flag controls whether we use compiler builtin functions to implement
+# various basic math operations or not.
+set(MISC_MATH_BASIC_OPS_OPT_FLAG "MISC_MATH_BASIC_OPS_OPT")
 
 # Skip FMA_OPT flag for targets that don't support fma.
 if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
new file mode 100644
index 0000000000000..a962df33e31c4
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -0,0 +1,14 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+  return __builtin_fmaxf16(x, y);
+}
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+  return __builtin_fminf16(x, y);
+}
+
+float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
+float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
+
+double try_builtin_fmax(double x, double y) { return __builtin_fmax(x, y); }
+double try_builtin_fmin(double x, double y) { return __builtin_fmin(x, y); }
+
+extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c2f58fb1a4f71..3d713368251f6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -400,6 +400,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -412,6 +414,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -437,6 +441,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1406,6 +1412,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.manipulation_functions
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1418,6 +1426,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.manipulation_functions
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1443,6 +1453,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.manipulation_functions
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2202,6 +2214,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2214,6 +2228,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2239,6 +2255,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2450,6 +2468,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2462,6 +2482,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2487,6 +2509,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/copysign.cpp b/libc/src/math/generic/copysign.cpp
index 149d725af08e2..186bb2c5983f4 100644
--- a/libc/src/math/generic/copysign.cpp
+++ b/libc/src/math/generic/copysign.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, copysign, (double x, double y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_copysign(x, y);
+#else
   return fputil::copysign(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf.cpp b/libc/src/math/generic/copysignf.cpp
index 17cd70d37c308..c79e50b61ebda 100644
--- a/libc/src/math/generic/copysignf.cpp
+++ b/libc/src/math/generic/copysignf.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, copysignf, (float x, float y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_copysignf(x, y);
+#else
   return fputil::copysign(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf16.cpp b/libc/src/math/generic/copysignf16.cpp
index 42695b3b4a6de..546622f049ebe 100644
--- a/libc/src/math/generic/copysignf16.cpp
+++ b/libc/src/math/generic/copysignf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, copysignf16, (float16 x, float16 y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_copysignf16(x, y);
+#else
   return fputil::copysign(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabs.cpp b/libc/src/math/generic/fabs.cpp
index 472297aecb2f7..55fa958cd7c00 100644
--- a/libc/src/math/generic/fabs.cpp
+++ b/libc/src/math/generic/fabs.cpp
@@ -13,6 +13,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(double, fabs, (double x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(double, fabs, (double x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_fabs(x);
+#else
+  return fputil::abs(x);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf.cpp b/libc/src/math/generic/fabsf.cpp
index ad4fcb30c795d..2ba18d09bbd5b 100644
--- a/libc/src/math/generic/fabsf.cpp
+++ b/libc/src/math/generic/fabsf.cpp
@@ -13,6 +13,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float, fabsf, (float x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float, fabsf, (float x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_fabsf(x);
+#else
+  return fputil::abs(x);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf16.cpp b/libc/src/math/generic/fabsf16.cpp
index 57671fb6067e2..2f982517614c4 100644
--- a/libc/src/math/generic/fabsf16.cpp
+++ b/libc/src/math/generic/fabsf16.cpp
@@ -13,6 +13,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_fabsf16(x);
+#else
+  return fputil::abs(x);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_num.cpp b/libc/src/math/generic/fmaximum_num.cpp
index 33df7daa380df..1bfc1514393ee 100644
--- a/libc/src/math/generic/fmaximum_num.cpp
+++ b/libc/src/math/generic/fmaximum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_num function----------------------------===//
+//===-- Implementation of fmaximum_num function ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fmaximum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmax(x, y);
+#else
   return fputil::fmaximum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf.cpp b/libc/src/math/generic/fmaximum_numf.cpp
index 1577080ba2c25..f8c69fa78be3d 100644
--- a/libc/src/math/generic/fmaximum_numf.cpp
+++ b/libc/src/math/generic/fmaximum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_numf function---------------------------===//
+//===-- Implementation of fmaximum_numf function --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, fmaximum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmaxf(x, y);
+#else
   return fputil::fmaximum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index 394ce8b5fe4f3..6a012d38abea4 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmaxf16(x, y);
+#else
   return fputil::fmaximum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_num.cpp b/libc/src/math/generic/fminimum_num.cpp
index 3ff79def58075..5b9c426ca50c2 100644
--- a/libc/src/math/generic/fminimum_num.cpp
+++ b/libc/src/math/generic/fminimum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_num function----------------------------===//
+//===-- Implementation of fminimum_num function ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fminimum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmin(x, y);
+#else
   return fputil::fminimum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf.cpp b/libc/src/math/generic/fminimum_numf.cpp
index c7ac99b14bd5a..6b6f905e63de3 100644
--- a/libc/src/math/generic/fminimum_numf.cpp
+++ b/libc/src/math/generic/fminimum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_numf function---------------------------===//
+//===-- Implementation of fminimum_numf function --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, fminimum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fminf(x, y);
+#else
   return fputil::fminimum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index 0af7205713c10..8e48aaf27070f 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fminf16(x, y);
+#else
   return fputil::fminimum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 995e41ba84b03..1ab0afbc9cbe8 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -10,6 +10,7 @@
 #include "src/__support/macros/config.h"
 #include "test/src/math/performance_testing/Timer.h"
 
+#include <algorithm>
 #include <cstddef>
 #include <fstream>
 
@@ -28,11 +29,11 @@ template <typename T> class BinaryOpSingleOutputPerf {
   static void run_perf_in_range(Func myFunc, Func otherFunc,
                                 StorageType startingBit, StorageType endingBit,
                                 size_t N, size_t rounds, std::ofstream &log) {
-    if (endingBit - startingBit < N)
-      N = endingBit - startingBit;
+    if (sizeof(StorageType) <= sizeof(size_t))
+      N = std::min(N, static_cast<size_t>(endingBit - startingBit));
 
     auto runner = [=](Func func) {
-      volatile T result;
+      [[maybe_unused]] volatile T result;
       if (endingBit < startingBit) {
         return;
       }
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index a75becba04d07..a4059c8ff4dd8 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -402,3 +402,23 @@ add_perf_binary(
   LINK_LIBRARIES
     LibcFPTestHelpers
 )
+
+add_perf_binary(
+  misc_basic_ops_perf
+  SRCS
+    misc_basic_ops_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    .single_input_single_output_diff
+    libc.src.math.copysignf
+    libc.src.math.copysignf16
+    libc.src.math.fabsf
+    libc.src.math.fabsf16
+    libc.src.math.fmaximum_numf
+    libc.src.math.fmaximum_numf16
+    libc.src.math.fminimum_numf
+    libc.src.math.fminimum_numf16
+    libc.src.math.frexpf16
+  COMPILE_OPTIONS
+    -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index 48ae43d6315e3..e0beb729cb9f5 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -10,6 +10,7 @@
 #include "src/__support/macros/config.h"
 #include "test/src/math/performance_testing/Timer.h"
 
+#include <algorithm>
 #include <fstream>
 
 namespace LIBC_NAMESPACE_DECL {
@@ -26,16 +27,21 @@ template <typename T> class SingleInputSingleOutputPerf {
 
   static void runPerfInRange(Func myFunc, Func otherFunc,
                              StorageType startingBit, StorageType endingBit,
-                             std::ofstream &log) {
+                             size_t rounds, std::ofstream &log) {
+    size_t n = 10'010'001;
+    if (sizeof(StorageType) <= sizeof(size_t))
+      n = std::min(n, static_cast<size_t>(endingBit - startingBit));
+
     auto runner = [=](Func func) {
-      constexpr StorageType N = 10'010'001;
-      StorageType step = (endingBit - startingBit) / N;
+      StorageType step = (endingBit - startingBit) / n;
       if (step == 0)
         step = 1;
-      volatile T result;
-      for (StorageType bits = startingBit; bits < endingBit; bits += step) {
-        T x = FPBits(bits).get_val();
-        result = func(x);
+      [[maybe_unused]] volatile T result;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bits = startingBit; bits < endingBit; bits += step) {
+          T x = FPBits(bits).get_val();
+          result = func(x);
+        }
       }
     };
 
@@ -44,8 +50,7 @@ template <typename T> class SingleInputSingleOutputPerf {
     runner(myFunc);
     timer.stop();
 
-    StorageType numberOfRuns = endingBit - startingBit + 1;
-    double myAverage = static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+    double myAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
     log << "-- My function --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << myAverage << " ns/op \n";
@@ -56,8 +61,7 @@ template <typename T> class SingleInputSingleOutputPerf {
     runner(otherFunc);
     timer.stop();
 
-    double otherAverage =
-        static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+    double otherAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
     log << "-- Other function --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << otherAverage << " ns/op \n";
@@ -68,15 +72,18 @@ template <typename T> class SingleInputSingleOutputPerf {
     log << "     Mine / Other's  : " << myAverage / otherAverage << " \n";
   }
 
-  static void runPerf(Func myFunc, Func otherFunc, const char *logFile) {
+  static void runPerf(Func myFunc, Func otherFunc, size_t rounds,
+                      const char *logFile) {
     std::ofstream log(logFile);
     log << " Performance tests with inputs in denormal range:\n";
     runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0),
-                   /* endingBit= */ FPBits::max_subnormal().uintval(), log);
+                   /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
+                   log);
     log << "\n Performance tests with inputs in normal range:\n";
     runPerfInRange(myFunc, otherFunc,
                    /* startingBit= */ FPBits::min_normal().uintval(),
-                   /* endingBit= */ FPBits::max_normal().uintval(), log);
+                   /* endingBit= */ FPBits::max_normal().uintval(), rounds,
+                   log);
   }
 };
 
@@ -86,6 +93,13 @@ template <typename T> class SingleInputSingleOutputPerf {
 #define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename)        \
   int main() {                                                                 \
     LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &myFunc, &otherFunc, filename);                                        \
+        &myFunc, &otherFunc, 1, filename);                                     \
     return 0;                                                                  \
   }
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds,       \
+                                           filename)                           \
+  {                                                                            \
+    LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
+        &myFunc, &otherFunc, rounds, filename);                                \
+  }
diff --git a/libc/test/src/m...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/99037