[libc-commits] [libc] [libc][math] Improve performance test output (PR #134501)

Fri Apr 18 11:31:03 PDT 2025

https://github.com/meltq updated https://github.com/llvm/llvm-project/pull/134501

>From 6d87fee5fb980121866bc9df4e24d56d987146ee Mon Sep 17 00:00:00 2001
From: meltq <alissxlace at proton.me>
Date: Sat, 5 Apr 2025 16:24:36 +0530
Subject: [PATCH 1/4] Binary op perf output improvement

---
 .../BinaryOpSingleOutputPerf.h                | 51 ++++++++++---------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 98a1813bd7b54..8001710d83f5e 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -26,7 +26,7 @@ class BinaryOpSingleOutputPerf {
 public:
   typedef OutputType Func(InputType, InputType);
 
-  static void run_perf_in_range(Func myFunc, Func otherFunc,
+  static void run_perf_in_range(Func FuncA, Func FuncB,
                                 StorageType startingBit, StorageType endingBit,
                                 size_t N, size_t rounds, std::ofstream &log) {
     if (sizeof(StorageType) <= sizeof(size_t))
@@ -54,48 +54,48 @@ class BinaryOpSingleOutputPerf {
 
     Timer timer;
     timer.start();
-    runner(myFunc);
+    runner(FuncA);
     timer.stop();
 
-    double my_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
-    log << "-- My function --\n";
+    double a_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Function A --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << my_average << " ns/op \n";
+    log << "     Average runtime : " << a_average << " ns/op \n";
     log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+        << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n";
 
     timer.start();
-    runner(otherFunc);
+    runner(FuncB);
     timer.stop();
 
-    double other_average =
-        static_cast<double>(timer.nanoseconds()) / N / rounds;
-    log << "-- Other function --\n";
+    double b_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Function B --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << other_average << " ns/op \n";
+    log << "     Average runtime : " << b_average << " ns/op \n";
     log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+        << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n";
 
-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << my_average / other_average << " \n";
+    log << "-- Average ops per second ratio --\n";
+    log << "     A / B  : " << b_average / a_average << " \n";
   }
 
-  static void run_perf(Func myFunc, Func otherFunc, int rounds,
-                       const char *logFile) {
+  static void run_perf(Func FuncA, Func FuncB, int rounds, const char *name_a,
+                       const char *name_b, const char *logFile) {
     std::ofstream log(logFile);
+    log << "Function A - " << name_a << " Function B - " << name_b << "\n";
     log << " Performance tests with inputs in denormal range:\n";
-    run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0),
+    run_perf_in_range(FuncA, FuncB, /* startingBit= */ StorageType(0),
                       /* endingBit= */ FPBits::max_subnormal().uintval(),
                       1'000'001, rounds, log);
     log << "\n Performance tests with inputs in normal range:\n";
-    run_perf_in_range(myFunc, otherFunc,
+    run_perf_in_range(FuncA, FuncB,
                       /* startingBit= */ FPBits::min_normal().uintval(),
                       /* endingBit= */ FPBits::max_normal().uintval(),
                       1'000'001, rounds, log);
     log << "\n Performance tests with inputs in normal range with exponents "
            "close to each other:\n";
     run_perf_in_range(
-        myFunc, otherFunc,
+        FuncA, FuncB,
         /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
         /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001,
         rounds, log);
@@ -128,21 +128,22 @@ class BinaryOpSingleOutputPerf {
 } // namespace testing
 } // namespace LIBC_NAMESPACE_DECL
 
-#define BINARY_OP_SINGLE_OUTPUT_PERF(OutputType, InputType, myFunc, otherFunc, \
+#define BINARY_OP_SINGLE_OUTPUT_PERF(OutputType, InputType, FuncA, FuncB,      \
                                      filename)                                 \
   int main() {                                                                 \
     LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&myFunc, &otherFunc, 1, filename);    \
+        OutputType, InputType>::run_perf(&FuncA, &FuncB, 1, #FuncA, #FuncB,    \
+                                         filename);                            \
     return 0;                                                                  \
   }
 
-#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, myFunc,         \
-                                        otherFunc, rounds, filename)           \
+#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, FuncA, FuncB,   \
+                                        rounds, filename)                      \
   {                                                                            \
     LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&myFunc, &otherFunc, rounds,          \
+        OutputType, InputType>::run_perf(&FuncA, &FuncB, 1, #FuncA, #FuncB,    \
                                          filename);                            \
     LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&myFunc, &otherFunc, rounds,          \
+        OutputType, InputType>::run_perf(&FuncA, &FuncB, 1, #FuncA, #FuncB,    \
                                          filename);                            \
   }

>From 441efdf9b6f5b8385b34768b484ac1b870256a77 Mon Sep 17 00:00:00 2001
From: meltq <alissxlace at proton.me>
Date: Sat, 5 Apr 2025 23:26:24 +0530
Subject: [PATCH 2/4] Single input perf output improvement

---
 .../SingleInputSingleOutputPerf.h             | 43 ++++++++++---------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index efad1259d6bf1..93c217de250e6 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -25,7 +25,7 @@ template <typename T> class SingleInputSingleOutputPerf {
 public:
   typedef T Func(T);
 
-  static void runPerfInRange(Func myFunc, Func otherFunc,
+  static void runPerfInRange(Func FuncA, Func FuncB,
                              StorageType startingBit, StorageType endingBit,
                              size_t rounds, std::ofstream &log) {
     size_t n = 10'010'001;
@@ -47,40 +47,41 @@ template <typename T> class SingleInputSingleOutputPerf {
 
     Timer timer;
     timer.start();
-    runner(myFunc);
+    runner(FuncA);
     timer.stop();
 
-    double myAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- My function --\n";
+    double a_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
+    log << "-- Function A --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << myAverage << " ns/op \n";
+    log << "     Average runtime : " << a_average << " ns/op \n";
     log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / myAverage) << " op/s \n";
+        << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n";
 
     timer.start();
-    runner(otherFunc);
+    runner(FuncB);
     timer.stop();
 
-    double otherAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- Other function --\n";
+    double b_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
+    log << "-- Function B --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << otherAverage << " ns/op \n";
+    log << "     Average runtime : " << b_average << " ns/op \n";
     log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / otherAverage) << " op/s \n";
+        << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n";
 
-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << myAverage / otherAverage << " \n";
+    log << "-- Average ops per second ratio --\n";
+    log << "     A / B  : " << b_average / a_average << " \n";
   }
 
-  static void runPerf(Func myFunc, Func otherFunc, size_t rounds,
-                      const char *logFile) {
+  static void runPerf(Func FuncA, Func FuncB, size_t rounds, const char *name_a,
+                      const char *name_b, const char *logFile) {
     std::ofstream log(logFile);
+    log << "Function A - " << name_a << " Function B - " << name_b << "\n";
     log << " Performance tests with inputs in denormal range:\n";
-    runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0),
+    runPerfInRange(FuncA, FuncB, /* startingBit= */ StorageType(0),
                    /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
                    log);
     log << "\n Performance tests with inputs in normal range:\n";
-    runPerfInRange(myFunc, otherFunc,
+    runPerfInRange(FuncA, FuncB,
                    /* startingBit= */ FPBits::min_normal().uintval(),
                    /* endingBit= */ FPBits::max_normal().uintval(), rounds,
                    log);
@@ -90,16 +91,16 @@ template <typename T> class SingleInputSingleOutputPerf {
 } // namespace testing
 } // namespace LIBC_NAMESPACE_DECL
 
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename)        \
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, FuncA, FuncB, filename)             \
   int main() {                                                                 \
     LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &myFunc, &otherFunc, 1, filename);                                     \
+        &FuncA, &FuncB, 1, #FuncA, #FuncB, filename);                          \
     return 0;                                                                  \
   }
 
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds,       \
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds,            \
                                            filename)                           \
   {                                                                            \
     LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &myFunc, &otherFunc, rounds, filename);                                \
+        &FuncA, &FuncB, rounds, #FuncA, #FuncB, filename);                     \
   }

>From 714f396c7642af3e75a11ea3f78a42c9de2501f3 Mon Sep 17 00:00:00 2001
From: meltq <alissxlace at proton.me>
Date: Sun, 6 Apr 2025 00:45:37 +0530
Subject: [PATCH 3/4] Formatting changes

---
 .../math/performance_testing/BinaryOpSingleOutputPerf.h  | 6 +++---
 .../performance_testing/SingleInputSingleOutputPerf.h    | 9 ++++-----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 8001710d83f5e..6068f1ea5f576 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -26,9 +26,9 @@ class BinaryOpSingleOutputPerf {
 public:
   typedef OutputType Func(InputType, InputType);
 
-  static void run_perf_in_range(Func FuncA, Func FuncB,
-                                StorageType startingBit, StorageType endingBit,
-                                size_t N, size_t rounds, std::ofstream &log) {
+  static void run_perf_in_range(Func FuncA, Func FuncB, StorageType startingBit,
+                                StorageType endingBit, size_t N, size_t rounds,
+                                std::ofstream &log) {
     if (sizeof(StorageType) <= sizeof(size_t))
       N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
 
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index 93c217de250e6..a434123a3f296 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -25,9 +25,9 @@ template <typename T> class SingleInputSingleOutputPerf {
 public:
   typedef T Func(T);
 
-  static void runPerfInRange(Func FuncA, Func FuncB,
-                             StorageType startingBit, StorageType endingBit,
-                             size_t rounds, std::ofstream &log) {
+  static void runPerfInRange(Func FuncA, Func FuncB, StorageType startingBit,
+                             StorageType endingBit, size_t rounds,
+                             std::ofstream &log) {
     size_t n = 10'010'001;
     if (sizeof(StorageType) <= sizeof(size_t))
       n = cpp::min(n, static_cast<size_t>(endingBit - startingBit));
@@ -98,8 +98,7 @@ template <typename T> class SingleInputSingleOutputPerf {
     return 0;                                                                  \
   }
 
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds,            \
-                                           filename)                           \
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds, filename)  \
   {                                                                            \
     LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
         &FuncA, &FuncB, rounds, #FuncA, #FuncB, filename);                     \

>From 9bb2e8a24d84c2c07c8b3bb9c36b5749a56f83c2 Mon Sep 17 00:00:00 2001
From: meltq <alissxlace at proton.me>
Date: Thu, 17 Apr 2025 20:10:30 +0530
Subject: [PATCH 4/4] Add PerfTest and remove old files

---
 .../BinaryOpSingleOutputPerf.h                | 149 ----------------
 .../math/performance_testing/CMakeLists.txt   |  83 ++++-----
 .../src/math/performance_testing/PerfTest.h   | 159 ++++++++++++++++++
 .../SingleInputSingleOutputPerf.h             | 105 ------------
 .../math/performance_testing/ceilf_perf.cpp   |  10 +-
 .../math/performance_testing/cosf_perf.cpp    |  10 +-
 .../performance_testing/exp10f16_perf.cpp     |   3 +-
 .../math/performance_testing/exp2f16_perf.cpp |   3 +-
 .../math/performance_testing/exp2f_perf.cpp   |  10 +-
 .../math/performance_testing/expf16_perf.cpp  |   3 +-
 .../math/performance_testing/expf_perf.cpp    |  10 +-
 .../math/performance_testing/expm1f_perf.cpp  |  10 +-
 .../math/performance_testing/fabsf_perf.cpp   |  10 +-
 .../math/performance_testing/floorf_perf.cpp  |  10 +-
 .../math/performance_testing/fmod_perf.cpp    |  10 +-
 .../performance_testing/fmodf128_perf.cpp     |  11 +-
 .../math/performance_testing/fmodf16_perf.cpp |  14 +-
 .../math/performance_testing/fmodf_perf.cpp   |  10 +-
 .../math/performance_testing/fmodl_perf.cpp   |  11 +-
 .../math/performance_testing/fmul_perf.cpp    |   8 +-
 .../math/performance_testing/fmull_perf.cpp   |   8 +-
 .../math/performance_testing/hypot_perf.cpp   |  10 +-
 .../performance_testing/hypotf16_perf.cpp     |  11 +-
 .../math/performance_testing/hypotf_perf.cpp  |   9 +-
 .../math/performance_testing/log10f_perf.cpp  |  10 +-
 .../math/performance_testing/log1pf_perf.cpp  |  10 +-
 .../math/performance_testing/log2f_perf.cpp   |  10 +-
 .../math/performance_testing/logbf_perf.cpp   |  10 +-
 .../math/performance_testing/logf_perf.cpp    |  10 +-
 .../max_min_funcs_perf.cpp                    |  63 ++++---
 .../misc_basic_ops_perf.cpp                   |  14 +-
 .../performance_testing/nearbyintf_perf.cpp   |  10 +-
 .../nearest_integer_funcs_perf.cpp            |  26 +--
 .../math/performance_testing/rintf_perf.cpp   |  10 +-
 .../math/performance_testing/roundf_perf.cpp  |  10 +-
 .../math/performance_testing/sinf_perf.cpp    |  10 +-
 .../performance_testing/sqrtf128_perf.cpp     |  10 +-
 .../math/performance_testing/sqrtf_perf.cpp   |  10 +-
 .../math/performance_testing/truncf_perf.cpp  |  10 +-
 39 files changed, 424 insertions(+), 476 deletions(-)
 delete mode 100644 libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
 create mode 100644 libc/test/src/math/performance_testing/PerfTest.h
 delete mode 100644 libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h

diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
deleted file mode 100644
index 6068f1ea5f576..0000000000000
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ /dev/null
@@ -1,149 +0,0 @@
-//===-- Common utility class for differential analysis --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/CPP/algorithm.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/macros/config.h"
-#include "test/src/math/performance_testing/Timer.h"
-
-#include <cstddef>
-#include <fstream>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace testing {
-template <typename OutputType, typename InputType>
-class BinaryOpSingleOutputPerf {
-  using FPBits = fputil::FPBits<OutputType>;
-  using StorageType = typename FPBits::StorageType;
-  static constexpr StorageType UIntMax =
-      cpp::numeric_limits<StorageType>::max();
-
-public:
-  typedef OutputType Func(InputType, InputType);
-
-  static void run_perf_in_range(Func FuncA, Func FuncB, StorageType startingBit,
-                                StorageType endingBit, size_t N, size_t rounds,
-                                std::ofstream &log) {
-    if (sizeof(StorageType) <= sizeof(size_t))
-      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
-
-    auto runner = [=](Func func) {
-      [[maybe_unused]] volatile OutputType result;
-      if (endingBit < startingBit) {
-        return;
-      }
-
-      StorageType step = (endingBit - startingBit) / N;
-      for (size_t i = 0; i < rounds; i++) {
-        for (StorageType bitsX = startingBit, bitsY = endingBit;;
-             bitsX += step, bitsY -= step) {
-          InputType x = FPBits(bitsX).get_val();
-          InputType y = FPBits(bitsY).get_val();
-          result = func(x, y);
-          if (endingBit - bitsX < step) {
-            break;
-          }
-        }
-      }
-    };
-
-    Timer timer;
-    timer.start();
-    runner(FuncA);
-    timer.stop();
-
-    double a_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
-    log << "-- Function A --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << a_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n";
-
-    timer.start();
-    runner(FuncB);
-    timer.stop();
-
-    double b_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
-    log << "-- Function B --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << b_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n";
-
-    log << "-- Average ops per second ratio --\n";
-    log << "     A / B  : " << b_average / a_average << " \n";
-  }
-
-  static void run_perf(Func FuncA, Func FuncB, int rounds, const char *name_a,
-                       const char *name_b, const char *logFile) {
-    std::ofstream log(logFile);
-    log << "Function A - " << name_a << " Function B - " << name_b << "\n";
-    log << " Performance tests with inputs in denormal range:\n";
-    run_perf_in_range(FuncA, FuncB, /* startingBit= */ StorageType(0),
-                      /* endingBit= */ FPBits::max_subnormal().uintval(),
-                      1'000'001, rounds, log);
-    log << "\n Performance tests with inputs in normal range:\n";
-    run_perf_in_range(FuncA, FuncB,
-                      /* startingBit= */ FPBits::min_normal().uintval(),
-                      /* endingBit= */ FPBits::max_normal().uintval(),
-                      1'000'001, rounds, log);
-    log << "\n Performance tests with inputs in normal range with exponents "
-           "close to each other:\n";
-    run_perf_in_range(
-        FuncA, FuncB,
-        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
-        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001,
-        rounds, log);
-  }
-
-  static void run_diff(Func myFunc, Func otherFunc, const char *logFile) {
-    uint64_t diffCount = 0;
-    std::ofstream log(logFile);
-    log << " Diff tests with inputs in denormal range:\n";
-    diffCount += run_diff_in_range(
-        myFunc, otherFunc, /* startingBit= */ StorageType(0),
-        /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, log);
-    log << "\n Diff tests with inputs in normal range:\n";
-    diffCount += run_diff_in_range(
-        myFunc, otherFunc,
-        /* startingBit= */ FPBits::min_normal().uintval(),
-        /* endingBit= */ FPBits::max_normal().uintval(), 100'000'001, log);
-    log << "\n Diff tests with inputs in normal range with exponents "
-           "close to each other:\n";
-    diffCount += run_diff_in_range(
-        myFunc, otherFunc,
-        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
-        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 10'000'001,
-        log);
-
-    log << "Total number of differing results: " << diffCount << '\n';
-  }
-};
-
-} // namespace testing
-} // namespace LIBC_NAMESPACE_DECL
-
-#define BINARY_OP_SINGLE_OUTPUT_PERF(OutputType, InputType, FuncA, FuncB,      \
-                                     filename)                                 \
-  int main() {                                                                 \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&FuncA, &FuncB, 1, #FuncA, #FuncB,    \
-                                         filename);                            \
-    return 0;                                                                  \
-  }
-
-#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, FuncA, FuncB,   \
-                                        rounds, filename)                      \
-  {                                                                            \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&FuncA, &FuncB, 1, #FuncA, #FuncB,    \
-                                         filename);                            \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&FuncA, &FuncB, 1, #FuncA, #FuncB,    \
-                                         filename);                            \
-  }
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index d8f87e04c15d8..618a6d037a63f 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -92,18 +92,9 @@ function(add_perf_binary target_name)
 endfunction()
 
 add_header_library(
-  single_input_single_output_diff
+  perf_test
   HDRS
-    SingleInputSingleOutputPerf.h
-  DEPENDS
-    libc.src.__support.CPP.algorithm
-    libc.src.__support.FPUtil.fp_bits
-)
-
-add_header_library(
-  binary_op_single_output_diff
-  HDRS
-    BinaryOpSingleOutputPerf.h
+    PerfTest.h
   DEPENDS
     libc.src.__support.CPP.algorithm
     libc.src.__support.FPUtil.fp_bits
@@ -114,7 +105,7 @@ add_perf_binary(
   SRCS
     sinf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.sinf
   COMPILE_OPTIONS
     -fno-builtin
@@ -125,7 +116,7 @@ add_perf_binary(
   SRCS
     cosf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.cosf
   COMPILE_OPTIONS
     -fno-builtin
@@ -136,7 +127,7 @@ add_perf_binary(
   SRCS
     expm1f_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.expm1f
   COMPILE_OPTIONS
     -fno-builtin
@@ -147,7 +138,7 @@ add_perf_binary(
   SRCS
     ceilf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.ceilf
   COMPILE_OPTIONS
     -fno-builtin
@@ -158,7 +149,7 @@ add_perf_binary(
   SRCS
     exp10f16_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.exp10f16
   COMPILE_OPTIONS
     -fno-builtin
@@ -169,7 +160,7 @@ add_perf_binary(
   SRCS
     exp2f_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.exp2f
   COMPILE_OPTIONS
     -fno-builtin
@@ -180,7 +171,7 @@ add_perf_binary(
   SRCS
     exp2f16_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.exp2f16
   COMPILE_OPTIONS
     -fno-builtin
@@ -191,7 +182,7 @@ add_perf_binary(
   SRCS
     expf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.expf
   COMPILE_OPTIONS
     -fno-builtin
@@ -202,7 +193,7 @@ add_perf_binary(
   SRCS
     expf16_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.expf16
   COMPILE_OPTIONS
     -fno-builtin
@@ -213,7 +204,7 @@ add_perf_binary(
   SRCS
     fabsf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.fabsf
   COMPILE_OPTIONS
     -fno-builtin
@@ -224,7 +215,7 @@ add_perf_binary(
   SRCS
     floorf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.floorf
   COMPILE_OPTIONS
     -fno-builtin
@@ -235,7 +226,7 @@ add_perf_binary(
   SRCS
     log10f_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.log10f
   COMPILE_OPTIONS
     -fno-builtin
@@ -246,7 +237,7 @@ add_perf_binary(
   SRCS
     log1pf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.log1pf
   COMPILE_OPTIONS
     -fno-builtin
@@ -257,7 +248,7 @@ add_perf_binary(
   SRCS
     log2f_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.log2f
   COMPILE_OPTIONS
     -fno-builtin
@@ -268,7 +259,7 @@ add_perf_binary(
   SRCS
     logf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.logf
   COMPILE_OPTIONS
     -fno-builtin
@@ -279,7 +270,7 @@ add_perf_binary(
   SRCS
     logbf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.logbf
   COMPILE_OPTIONS
     -fno-builtin
@@ -290,7 +281,7 @@ add_perf_binary(
   SRCS
     nearbyintf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.nearbyintf
   COMPILE_OPTIONS
     -fno-builtin
@@ -301,7 +292,7 @@ add_perf_binary(
   SRCS
     rintf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.rintf
   COMPILE_OPTIONS
     -fno-builtin
@@ -312,7 +303,7 @@ add_perf_binary(
   SRCS
     roundf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.roundf
   COMPILE_OPTIONS
     -fno-builtin
@@ -323,7 +314,7 @@ add_perf_binary(
   SRCS
     sqrtf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.sqrtf
   COMPILE_OPTIONS
     -fno-builtin
@@ -334,7 +325,7 @@ add_perf_binary(
   SRCS
     truncf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.truncf
   COMPILE_OPTIONS
     -fno-builtin
@@ -345,7 +336,7 @@ add_perf_binary(
   SRCS
     hypotf16_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.hypotf16
     libc.src.__support.FPUtil.fp_bits
   COMPILE_OPTIONS
@@ -357,7 +348,7 @@ add_perf_binary(
   SRCS
     hypotf_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.hypotf
   COMPILE_OPTIONS
     -fno-builtin
@@ -368,7 +359,7 @@ add_perf_binary(
   SRCS
     hypot_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.hypot
   COMPILE_OPTIONS
     -fno-builtin
@@ -379,7 +370,7 @@ add_perf_binary(
   SRCS
     fmodf_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.fmodf
   COMPILE_OPTIONS
     -fno-builtin
@@ -390,7 +381,7 @@ add_perf_binary(
   SRCS
     fmod_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.fmod
   COMPILE_OPTIONS
     -fno-builtin
@@ -401,7 +392,7 @@ add_perf_binary(
   SRCS
     fmodl_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.fmodl
   COMPILE_OPTIONS
     -fno-builtin
@@ -412,7 +403,7 @@ add_perf_binary(
   SRCS
     fmodf16_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.fmodf16
     libc.src.__support.FPUtil.generic.fmod
     libc.src.__support.macros.properties.types
@@ -423,8 +414,9 @@ add_perf_binary(
   SRCS
     fmodf128_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.fmodf128
+    libc.src.__support.macros.properties.types
   COMPILE_OPTIONS
     -fno-builtin
 )
@@ -457,8 +449,7 @@ add_perf_binary(
   SRCS
     misc_basic_ops_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.copysignf
     libc.src.math.copysignf16
     libc.src.math.fabsf
@@ -472,7 +463,7 @@ add_perf_binary(
   SRCS
     max_min_funcs_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.fmaxf
     libc.src.math.fmaxf16
     libc.src.math.fmaximumf
@@ -494,7 +485,7 @@ add_perf_binary(
   SRCS
     fmul_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.fmul
     libc.src.__support.FPUtil.generic.mul
     libc.src.__support.FPUtil.fp_bits
@@ -507,7 +498,7 @@ add_perf_binary(
   SRCS
     fmull_perf.cpp
   DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
     libc.src.math.fmull
   COMPILE_OPTIONS
     -fno-builtin
@@ -518,6 +509,6 @@ add_perf_binary(
   SRCS
     sqrtf128_perf.cpp
   DEPENDS
-    .single_input_single_output_diff
+    .perf_test
     libc.src.math.sqrtf128
 )
diff --git a/libc/test/src/math/performance_testing/PerfTest.h b/libc/test/src/math/performance_testing/PerfTest.h
new file mode 100644
index 0000000000000..3cc6b2411aeab
--- /dev/null
+++ b/libc/test/src/math/performance_testing/PerfTest.h
@@ -0,0 +1,159 @@
+//===-- Common utility class for differential analysis --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/algorithm.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/config.h"
+#include "test/src/math/performance_testing/Timer.h"
+
+#include <cstddef>
+#include <fstream>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace testing {
+template <typename OutputType, typename InputType> class PerfTest {
+  using FPBits = fputil::FPBits<OutputType>;
+  using StorageType = typename FPBits::StorageType;
+  static constexpr StorageType U_INT_MAX =
+      cpp::numeric_limits<StorageType>::max();
+
+public:
+  using BinaryFuncPtr = OutputType (*)(InputType, InputType);
+  using UnaryFuncPtr = OutputType (*)(InputType);
+
+  template <bool binary, typename Func>
+  static void run_perf_in_range(Func FuncA, Func FuncB, StorageType startingBit,
+                                StorageType endingBit, size_t N, size_t rounds,
+                                const char *name_a, const char *name_b,
+                                std::ofstream &log) {
+    if (sizeof(StorageType) <= sizeof(size_t))
+      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
+
+    auto runner = [=](Func func) {
+      [[maybe_unused]] volatile OutputType result;
+      if (endingBit < startingBit) {
+        return;
+      }
+
+      StorageType step = (endingBit - startingBit) / N;
+      if (step == 0)
+        step = 1;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bits_x = startingBit, bits_y = endingBit;;
+             bits_x += step, bits_y -= step) {
+          InputType x = FPBits(bits_x).get_val();
+          if constexpr (binary) {
+            InputType y = FPBits(bits_y).get_val();
+            result = func(x, y);
+          } else {
+            result = func(x);
+          }
+          if (endingBit - bits_x < step) {
+            break;
+          }
+        }
+      }
+    };
+
+    Timer timer;
+    timer.start();
+    runner(FuncA);
+    timer.stop();
+
+    double a_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Function A: " << name_a << " --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << a_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n";
+
+    timer.start();
+    runner(FuncB);
+    timer.stop();
+
+    double b_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Function B: " << name_b << " --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << b_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n";
+
+    log << "-- Average ops per second ratio --\n";
+    log << "     A / B  : " << b_average / a_average << " \n";
+  }
+
+  template <bool binary, typename Func>
+  static void run_perf(Func FuncA, Func FuncB, int rounds, const char *name_a,
+                       const char *name_b, const char *logFile) {
+    std::ofstream log(logFile);
+    log << " Performance tests with inputs in denormal range:\n";
+    run_perf_in_range<binary>(
+        FuncA, FuncB, /* startingBit= */ StorageType(0),
+        /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, rounds,
+        name_a, name_b, log);
+    log << "\n Performance tests with inputs in normal range:\n";
+    run_perf_in_range<binary>(FuncA, FuncB,
+                              /* startingBit= */ FPBits::min_normal().uintval(),
+                              /* endingBit= */ FPBits::max_normal().uintval(),
+                              1'000'001, rounds, name_a, name_b, log);
+    log << "\n Performance tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    run_perf_in_range<binary>(
+        FuncA, FuncB,
+        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001,
+        rounds, name_a, name_b, log);
+  }
+};
+
+} // namespace testing
+} // namespace LIBC_NAMESPACE_DECL
+
+#define BINARY_INPUT_SINGLE_OUTPUT_PERF(OutputType, InputType, FuncA, FuncB,   \
+                                        filename)                              \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<OutputType,                 \
+                                                   InputType>::BinaryFuncPtr;  \
+    LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>(  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename);      \
+    return 0;                                                                  \
+  }
+
+#define BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, FuncA,       \
+                                           FuncB, rounds, filename)            \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<OutputType,                 \
+                                                   InputType>::BinaryFuncPtr;  \
+    LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>(  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \
+    return 0;                                                                  \
+  }
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, FuncA, FuncB, filename)             \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr;        \
+    LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>(                  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename);      \
+    return 0;                                                                  \
+  }
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds, filename)  \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr;        \
+    LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>(                  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \
+    return 0;                                                                  \
+  }
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
deleted file mode 100644
index a434123a3f296..0000000000000
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ /dev/null
@@ -1,105 +0,0 @@
-//===-- Common utility class for differential analysis --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/CPP/algorithm.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/macros/config.h"
-#include "test/src/math/performance_testing/Timer.h"
-
-#include <fstream>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace testing {
-
-template <typename T> class SingleInputSingleOutputPerf {
-  using FPBits = fputil::FPBits<T>;
-  using StorageType = typename FPBits::StorageType;
-  static constexpr StorageType UIntMax =
-      cpp::numeric_limits<StorageType>::max();
-
-public:
-  typedef T Func(T);
-
-  static void runPerfInRange(Func FuncA, Func FuncB, StorageType startingBit,
-                             StorageType endingBit, size_t rounds,
-                             std::ofstream &log) {
-    size_t n = 10'010'001;
-    if (sizeof(StorageType) <= sizeof(size_t))
-      n = cpp::min(n, static_cast<size_t>(endingBit - startingBit));
-
-    auto runner = [=](Func func) {
-      StorageType step = (endingBit - startingBit) / n;
-      if (step == 0)
-        step = 1;
-      [[maybe_unused]] volatile T result;
-      for (size_t i = 0; i < rounds; i++) {
-        for (StorageType bits = startingBit; bits < endingBit; bits += step) {
-          T x = FPBits(bits).get_val();
-          result = func(x);
-        }
-      }
-    };
-
-    Timer timer;
-    timer.start();
-    runner(FuncA);
-    timer.stop();
-
-    double a_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- Function A --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << a_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n";
-
-    timer.start();
-    runner(FuncB);
-    timer.stop();
-
-    double b_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- Function B --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << b_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n";
-
-    log << "-- Average ops per second ratio --\n";
-    log << "     A / B  : " << b_average / a_average << " \n";
-  }
-
-  static void runPerf(Func FuncA, Func FuncB, size_t rounds, const char *name_a,
-                      const char *name_b, const char *logFile) {
-    std::ofstream log(logFile);
-    log << "Function A - " << name_a << " Function B - " << name_b << "\n";
-    log << " Performance tests with inputs in denormal range:\n";
-    runPerfInRange(FuncA, FuncB, /* startingBit= */ StorageType(0),
-                   /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
-                   log);
-    log << "\n Performance tests with inputs in normal range:\n";
-    runPerfInRange(FuncA, FuncB,
-                   /* startingBit= */ FPBits::min_normal().uintval(),
-                   /* endingBit= */ FPBits::max_normal().uintval(), rounds,
-                   log);
-  }
-};
-
-} // namespace testing
-} // namespace LIBC_NAMESPACE_DECL
-
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, FuncA, FuncB, filename)             \
-  int main() {                                                                 \
-    LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &FuncA, &FuncB, 1, #FuncA, #FuncB, filename);                          \
-    return 0;                                                                  \
-  }
-
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds, filename)  \
-  {                                                                            \
-    LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &FuncA, &FuncB, rounds, #FuncA, #FuncB, filename);                     \
-  }
diff --git a/libc/test/src/math/performance_testing/ceilf_perf.cpp b/libc/test/src/math/performance_testing/ceilf_perf.cpp
index 04e96f6fb2dcc..37c5d31a3a393 100644
--- a/libc/test/src/math/performance_testing/ceilf_perf.cpp
+++ b/libc/test/src/math/performance_testing/ceilf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/ceilf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf,
-                                "ceilf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf,
+                                  "ceilf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/cosf_perf.cpp b/libc/test/src/math/performance_testing/cosf_perf.cpp
index 1501b8bf25404..b189c554265e1 100644
--- a/libc/test/src/math/performance_testing/cosf_perf.cpp
+++ b/libc/test/src/math/performance_testing/cosf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/cosf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::cosf, ::cosf,
-                                "cosf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::cosf, ::cosf,
+                                  "cosf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/exp10f16_perf.cpp b/libc/test/src/math/performance_testing/exp10f16_perf.cpp
index b9e76d420afce..8d7bb7b6b1aa8 100644
--- a/libc/test/src/math/performance_testing/exp10f16_perf.cpp
+++ b/libc/test/src/math/performance_testing/exp10f16_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/exp10f16.h"
 
 // LLVM libc might be the only libc implementation with support for float16 math
diff --git a/libc/test/src/math/performance_testing/exp2f16_perf.cpp b/libc/test/src/math/performance_testing/exp2f16_perf.cpp
index aa58de2476f1a..c564fa17bb1fe 100644
--- a/libc/test/src/math/performance_testing/exp2f16_perf.cpp
+++ b/libc/test/src/math/performance_testing/exp2f16_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/exp2f16.h"
 
 // LLVM libc might be the only libc implementation with support for float16 math
diff --git a/libc/test/src/math/performance_testing/exp2f_perf.cpp b/libc/test/src/math/performance_testing/exp2f_perf.cpp
index 19a70ac6569aa..fa5a6ad452e94 100644
--- a/libc/test/src/math/performance_testing/exp2f_perf.cpp
+++ b/libc/test/src/math/performance_testing/exp2f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/exp2f.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::exp2f, ::exp2f,
-                                "exp2f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::exp2f, ::exp2f,
+                                  "exp2f_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/expf16_perf.cpp b/libc/test/src/math/performance_testing/expf16_perf.cpp
index bc9d9f05559a3..7fe567c1e3951 100644
--- a/libc/test/src/math/performance_testing/expf16_perf.cpp
+++ b/libc/test/src/math/performance_testing/expf16_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/expf16.h"
 
 // LLVM libc might be the only libc implementation with support for float16 math
diff --git a/libc/test/src/math/performance_testing/expf_perf.cpp b/libc/test/src/math/performance_testing/expf_perf.cpp
index 4b743514023d1..33306d181be20 100644
--- a/libc/test/src/math/performance_testing/expf_perf.cpp
+++ b/libc/test/src/math/performance_testing/expf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/expf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expf, ::expf,
-                                "expf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expf, ::expf,
+                                  "expf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/expm1f_perf.cpp b/libc/test/src/math/performance_testing/expm1f_perf.cpp
index 128ab351d86db..a64f3039f8a5e 100644
--- a/libc/test/src/math/performance_testing/expm1f_perf.cpp
+++ b/libc/test/src/math/performance_testing/expm1f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/expm1f.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expm1f, ::expm1f,
-                                "expm1f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expm1f, ::expm1f,
+                                  "expm1f_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/fabsf_perf.cpp b/libc/test/src/math/performance_testing/fabsf_perf.cpp
index b6c6add75d230..7a8bd1648f81b 100644
--- a/libc/test/src/math/performance_testing/fabsf_perf.cpp
+++ b/libc/test/src/math/performance_testing/fabsf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/fabsf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fabsf, ::fabsf,
-                                "fabsf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fabsf, ::fabsf,
+                                  "fabsf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/floorf_perf.cpp b/libc/test/src/math/performance_testing/floorf_perf.cpp
index 0f1087b3c8236..85fa7391d0d37 100644
--- a/libc/test/src/math/performance_testing/floorf_perf.cpp
+++ b/libc/test/src/math/performance_testing/floorf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/floorf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::floorf, ::floorf,
-                                "floorf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::floorf, ::floorf,
+                                  "floorf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/fmod_perf.cpp b/libc/test/src/math/performance_testing/fmod_perf.cpp
index 75a4242034226..a99a61f6f9054 100644
--- a/libc/test/src/math/performance_testing/fmod_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmod_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/fmod.h"
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::fmod, ::fmod,
-                             "fmod_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::fmod, ::fmod,
+                                  "fmod_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/fmodf128_perf.cpp b/libc/test/src/math/performance_testing/fmodf128_perf.cpp
index 8165e9254dd56..93212686c2abd 100644
--- a/libc/test/src/math/performance_testing/fmodf128_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodf128_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputDiff.h"
-
+#include "PerfTest.h"
+#include "src/__support/macros/properties/types.h"
 #include "src/math/fmodf128.h"
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fmodf128, ::fmodf128,
-                             "fmodf128_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float128, float128, LIBC_NAMESPACE::fmodf128,
+                                  ::fmodf128, "fmodf128_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/fmodf16_perf.cpp b/libc/test/src/math/performance_testing/fmodf16_perf.cpp
index 062bc2da05adf..f7c492cb77796 100644
--- a/libc/test/src/math/performance_testing/fmodf16_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodf16_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 
 #include "src/__support/FPUtil/generic/FMod.h"
 #include "src/__support/macros/properties/types.h"
@@ -16,12 +16,12 @@
 #define FMOD_FUNC(U) (LIBC_NAMESPACE::fputil::generic::FMod<float16, U>::eval)
 
 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
-                                  FMOD_FUNC(uint32_t), 5000,
-                                  "fmodf16_u16_vs_u32_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
+                                     FMOD_FUNC(uint32_t), 5000,
+                                     "fmodf16_u16_vs_u32_perf.log")
 
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
-                                  FMOD_FUNC(uint64_t), 5000,
-                                  "fmodf16_u16_vs_u64_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
+                                     FMOD_FUNC(uint64_t), 5000,
+                                     "fmodf16_u16_vs_u64_perf.log")
   return 0;
 }
diff --git a/libc/test/src/math/performance_testing/fmodf_perf.cpp b/libc/test/src/math/performance_testing/fmodf_perf.cpp
index b4f37ef25e676..27cc7eda6a892 100644
--- a/libc/test/src/math/performance_testing/fmodf_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/fmodf.h"
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::fmodf, ::fmodf,
-                             "fmodf_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::fmodf, ::fmodf,
+                                  "fmodf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/fmodl_perf.cpp b/libc/test/src/math/performance_testing/fmodl_perf.cpp
index aefdf2d6b42fc..bb5a1d83fa9f1 100644
--- a/libc/test/src/math/performance_testing/fmodl_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodl_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputDiff.h"
-
+#include "PerfTest.h"
 #include "src/math/fmodl.h"
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(long double, LIBC_NAMESPACE::fmodl, ::fmodl,
-                             "fmodl_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(long double, long double,
+                                  LIBC_NAMESPACE::fmodl, ::fmodl,
+                                  "fmodl_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/fmul_perf.cpp b/libc/test/src/math/performance_testing/fmul_perf.cpp
index f15cfafbf2945..0a8dcfea121e8 100644
--- a/libc/test/src/math/performance_testing/fmul_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmul_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/__support/FPUtil/generic/mul.h"
 #include "src/math/fmul.h"
 
@@ -17,8 +17,8 @@ float fmul_placeholder_binary(double x, double y) {
 }
 
 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
-                                  fmul_placeholder_binary, DOUBLE_ROUNDS,
-                                  "fmul_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
+                                     fmul_placeholder_binary, DOUBLE_ROUNDS,
+                                     "fmul_perf.log")
   return 0;
 }
diff --git a/libc/test/src/math/performance_testing/fmull_perf.cpp b/libc/test/src/math/performance_testing/fmull_perf.cpp
index 058e10288dbde..16ea375b8ee12 100644
--- a/libc/test/src/math/performance_testing/fmull_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmull_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/math/fmull.h"
 
 static constexpr size_t LONG_DOUBLE_ROUNDS = 40;
@@ -16,8 +16,8 @@ float fmull_placeholder_binary(long double x, long double y) {
 }
 
 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
-                                  fmull_placeholder_binary, LONG_DOUBLE_ROUNDS,
-                                  "fmull_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
+                                     fmull_placeholder_binary,
+                                     LONG_DOUBLE_ROUNDS, "fmull_perf.log")
   return 0;
 }
diff --git a/libc/test/src/math/performance_testing/hypot_perf.cpp b/libc/test/src/math/performance_testing/hypot_perf.cpp
index 04a493ff0e025..f7a310736d56a 100644
--- a/libc/test/src/math/performance_testing/hypot_perf.cpp
+++ b/libc/test/src/math/performance_testing/hypot_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/hypot.h"
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::hypot, ::hypot,
-                             "hypot_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::hypot,
+                                  ::hypot, "hypot_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/hypotf16_perf.cpp b/libc/test/src/math/performance_testing/hypotf16_perf.cpp
index b53a9042171a6..883331ad4db18 100644
--- a/libc/test/src/math/performance_testing/hypotf16_perf.cpp
+++ b/libc/test/src/math/performance_testing/hypotf16_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 
 #include "src/__support/FPUtil/Hypot.h"
 #include "src/math/hypotf16.h"
 
-BINARY_OP_SINGLE_OUTPUT_PERF(float16, float16, LIBC_NAMESPACE::hypotf16,
-                             LIBC_NAMESPACE::fputil::hypot<float16>,
-                             "hypotf16_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float16, float16, LIBC_NAMESPACE::hypotf16,
+                                  LIBC_NAMESPACE::fputil::hypot<float16>,
+                                  "hypotf16_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/hypotf_perf.cpp b/libc/test/src/math/performance_testing/hypotf_perf.cpp
index 8a42f792263c9..00f22335b70a9 100644
--- a/libc/test/src/math/performance_testing/hypotf_perf.cpp
+++ b/libc/test/src/math/performance_testing/hypotf_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 
 #include "src/math/hypotf.h"
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::hypotf, ::hypotf,
-                             "hypotf_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::hypotf,
+                                  ::hypotf, "hypotf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/log10f_perf.cpp b/libc/test/src/math/performance_testing/log10f_perf.cpp
index 32a31b9325285..87e191e72d3a6 100644
--- a/libc/test/src/math/performance_testing/log10f_perf.cpp
+++ b/libc/test/src/math/performance_testing/log10f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/log10f.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log10f, ::log10f,
-                                "log10f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log10f, ::log10f,
+                                  "log10f_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/log1pf_perf.cpp b/libc/test/src/math/performance_testing/log1pf_perf.cpp
index 18c168423b87d..2484b03efc103 100644
--- a/libc/test/src/math/performance_testing/log1pf_perf.cpp
+++ b/libc/test/src/math/performance_testing/log1pf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/log1pf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log1pf, ::log1pf,
-                                "log1pf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log1pf, ::log1pf,
+                                  "log1pf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/log2f_perf.cpp b/libc/test/src/math/performance_testing/log2f_perf.cpp
index c4c4dbf4d9f55..9d0e6e5858d37 100644
--- a/libc/test/src/math/performance_testing/log2f_perf.cpp
+++ b/libc/test/src/math/performance_testing/log2f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/log2f.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log2f, ::log2f,
-                                "log2f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log2f, ::log2f,
+                                  "log2f_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/logbf_perf.cpp b/libc/test/src/math/performance_testing/logbf_perf.cpp
index eefd64b8ae913..b5e6d1ffeb609 100644
--- a/libc/test/src/math/performance_testing/logbf_perf.cpp
+++ b/libc/test/src/math/performance_testing/logbf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/logbf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logbf, ::logbf,
-                                "logbf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logbf, ::logbf,
+                                  "logbf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/logf_perf.cpp b/libc/test/src/math/performance_testing/logf_perf.cpp
index 53f4f50e09efe..b078d9f478442 100644
--- a/libc/test/src/math/performance_testing/logf_perf.cpp
+++ b/libc/test/src/math/performance_testing/logf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/logf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logf, ::logf,
-                                "logf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logf, ::logf,
+                                  "logf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp b/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
index b77268d107c58..7bf9a8621f2b7 100644
--- a/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
+++ b/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/math/fmaxf.h"
 #include "src/math/fmaxf16.h"
 #include "src/math/fmaximum_numf.h"
@@ -35,41 +35,40 @@ float16 placeholder_binaryf16(float16 x, float16 y) { return x; }
 float placeholder_binaryf(float x, float y) { return x; }
 
 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fmaxf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fmaxf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fminf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fminf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fmaximumf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fmaximumf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fminimumf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fminimumf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fmaxf16,
+                                     placeholder_binaryf16, FLOAT16_ROUNDS,
+                                     "fmaxf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fminf16,
+                                     placeholder_binaryf16, FLOAT16_ROUNDS,
+                                     "fminf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float16, float16, LIBC_NAMESPACE::fmaximumf16, placeholder_binaryf16,
+      FLOAT16_ROUNDS, "fmaximumf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float16, float16, LIBC_NAMESPACE::fminimumf16, placeholder_binaryf16,
+      FLOAT16_ROUNDS, "fminimumf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
       float16, float16, LIBC_NAMESPACE::fmaximum_numf16, placeholder_binaryf16,
       FLOAT16_ROUNDS, "fmaximum_numf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
       float16, float16, LIBC_NAMESPACE::fminimum_numf16, placeholder_binaryf16,
       FLOAT16_ROUNDS, "fminimum_numf16_perf.log")
 
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaxf, ::fmaxf,
-                                  FLOAT_ROUNDS, "fmaxf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminf, ::fminf,
-                                  FLOAT_ROUNDS, "fminf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaximumf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fmaximumf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminimumf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fminimumf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaximum_numf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fmaximum_numf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminimum_numf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fminimum_numf_perf.log")
-
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaxf,
+                                     ::fmaxf, FLOAT_ROUNDS, "fmaxf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminf,
+                                     ::fminf, FLOAT_ROUNDS, "fminf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaximumf,
+                                     placeholder_binaryf, FLOAT_ROUNDS,
+                                     "fmaximumf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminimumf,
+                                     placeholder_binaryf, FLOAT_ROUNDS,
+                                     "fminimumf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float, float, LIBC_NAMESPACE::fmaximum_numf, placeholder_binaryf,
+      FLOAT_ROUNDS, "fmaximum_numf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float, float, LIBC_NAMESPACE::fminimum_numf, placeholder_binaryf,
+      FLOAT_ROUNDS, "fminimum_numf_perf.log")
   return 0;
 }
diff --git a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
index 9a4522c307ac7..6f7864e5eabae 100644
--- a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
+++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BinaryOpSingleOutputPerf.h"
-#include "SingleInputSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/math/copysignf.h"
 #include "src/math/copysignf16.h"
 #include "src/math/fabsf.h"
@@ -28,14 +27,15 @@ int main() {
   SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fabsf16,
                                      placeholder_unaryf16, FLOAT16_ROUNDS,
                                      "fabsf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::copysignf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "copysignf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float16, float16, LIBC_NAMESPACE::copysignf16, placeholder_binaryf16,
+      FLOAT16_ROUNDS, "copysignf16_perf.log")
 
   SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fabsf, fabsf,
                                      FLOAT_ROUNDS, "fabsf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::copysignf,
-                                  copysignf, FLOAT_ROUNDS, "copysignf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::copysignf,
+                                     copysignf, FLOAT_ROUNDS,
+                                     "copysignf_perf.log")
 
   return 0;
 }
diff --git a/libc/test/src/math/performance_testing/nearbyintf_perf.cpp b/libc/test/src/math/performance_testing/nearbyintf_perf.cpp
index ae708dd213243..3fa844dfbed96 100644
--- a/libc/test/src/math/performance_testing/nearbyintf_perf.cpp
+++ b/libc/test/src/math/performance_testing/nearbyintf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/nearbyintf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::nearbyintf, ::nearbyintf,
-                                "nearbyintf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::nearbyintf,
+                                  ::nearbyintf, "nearbyintf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
index b7bd6636a72e1..fa5f80fdba9f0 100644
--- a/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
+++ b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
@@ -40,7 +40,8 @@ template <typename T> class NearestIntegerPerf {
   static void run_perf_in_range(Func my_func, Func other_func,
                                 StorageType starting_bit,
                                 StorageType ending_bit, StorageType step,
-                                size_t rounds, std::ofstream &log) {
+                                size_t rounds, const char *name_a,
+                                const char *name_b, std::ofstream &log) {
     auto runner = [=](Func func) {
       [[maybe_unused]] volatile T result;
       for (size_t i = 0; i < rounds; i++) {
@@ -60,7 +61,7 @@ template <typename T> class NearestIntegerPerf {
     size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
     double my_average =
         static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
-    log << "-- My function --\n";
+    log << "-- Function A: " << name_a << " --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << my_average << " ns/op \n";
     log << "     Ops per second  : "
@@ -72,17 +73,18 @@ template <typename T> class NearestIntegerPerf {
 
     double other_average =
         static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
-    log << "-- Other function --\n";
+    log << "-- Function B: " << name_b << " --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << other_average << " ns/op \n";
     log << "     Ops per second  : "
         << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
 
-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << my_average / other_average << " \n";
+    log << "-- Average ops per second ratio --\n";
+    log << "     A / B  : " << other_average / my_average << " \n";
   }
 
   static void run_perf(Func my_func, Func other_func, size_t rounds,
+                       const char *name_a, const char *name_b,
                        const char *log_file) {
     std::ofstream log(log_file);
     log << "Performance tests with inputs in normal integral range:\n";
@@ -93,14 +95,14 @@ template <typename T> class NearestIntegerPerf {
         StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
                     << FPBits::SIG_LEN),
         /*step=*/StorageType(1 << FPBits::SIG_LEN),
-        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
     log << "\n Performance tests with inputs in low integral range:\n";
     run_perf_in_range(
         my_func, other_func,
         /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
         /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
         /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
-        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
     log << "\n Performance tests with inputs in high integral range:\n";
     run_perf_in_range(
         my_func, other_func,
@@ -110,7 +112,7 @@ template <typename T> class NearestIntegerPerf {
         /*ending_bit=*/
         StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
         /*step=*/StorageType(1 << FPBits::SIG_LEN),
-        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
     log << "\n Performance tests with inputs in normal fractional range:\n";
     run_perf_in_range(
         my_func, other_func,
@@ -118,11 +120,11 @@ template <typename T> class NearestIntegerPerf {
         StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
         /*ending_bit=*/
         StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
-        /*step=*/StorageType(1), rounds * 2, log);
+        /*step=*/StorageType(1), rounds * 2, name_a, name_b, log);
     log << "\n Performance tests with inputs in subnormal fractional range:\n";
     run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
                       /*ending_bit=*/StorageType(FPBits::SIG_MASK),
-                      /*step=*/StorageType(1), rounds, log);
+                      /*step=*/StorageType(1), rounds, name_a, name_b, log);
   }
 };
 
@@ -131,9 +133,7 @@ template <typename T> class NearestIntegerPerf {
 #define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename)         \
   {                                                                            \
     LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf(                  \
-        &my_func, &other_func, rounds, filename);                              \
-    LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf(                  \
-        &my_func, &other_func, rounds, filename);                              \
+        &my_func, &other_func, rounds, #my_func, #other_func, filename);       \
   }
 
 static constexpr size_t FLOAT16_ROUNDS = 20'000;
diff --git a/libc/test/src/math/performance_testing/rintf_perf.cpp b/libc/test/src/math/performance_testing/rintf_perf.cpp
index 6347ac9149af6..f54b19c4dd157 100644
--- a/libc/test/src/math/performance_testing/rintf_perf.cpp
+++ b/libc/test/src/math/performance_testing/rintf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/rintf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::rintf, ::rintf,
-                                "rintf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::rintf, ::rintf,
+                                  "rintf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/roundf_perf.cpp b/libc/test/src/math/performance_testing/roundf_perf.cpp
index 36becacba07cb..fb2a6309c89ed 100644
--- a/libc/test/src/math/performance_testing/roundf_perf.cpp
+++ b/libc/test/src/math/performance_testing/roundf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/roundf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::roundf, ::roundf,
-                                "roundf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::roundf, ::roundf,
+                                  "roundf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/sinf_perf.cpp b/libc/test/src/math/performance_testing/sinf_perf.cpp
index 43ba60e1ef76a..e12a4b1dbb3ff 100644
--- a/libc/test/src/math/performance_testing/sinf_perf.cpp
+++ b/libc/test/src/math/performance_testing/sinf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/sinf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sinf, ::sinf,
-                                "sinf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sinf, ::sinf,
+                                  "sinf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/sqrtf128_perf.cpp b/libc/test/src/math/performance_testing/sqrtf128_perf.cpp
index bc04e698b2439..e6a30aff7d463 100644
--- a/libc/test/src/math/performance_testing/sqrtf128_perf.cpp
+++ b/libc/test/src/math/performance_testing/sqrtf128_perf.cpp
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/__support/FPUtil/sqrt.h"
 #include "src/math/sqrtf128.h"
 
@@ -16,5 +15,8 @@ float128 sqrtf128_placeholder(float128 x) {
   return LIBC_NAMESPACE::fputil::sqrt<float128>(x);
 }
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float128, LIBC_NAMESPACE::sqrtf128,
-                                ::sqrtf128_placeholder, "sqrtf128_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float128, LIBC_NAMESPACE::sqrtf128,
+                                  ::sqrtf128_placeholder, "sqrtf128_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/sqrtf_perf.cpp b/libc/test/src/math/performance_testing/sqrtf_perf.cpp
index 71325518533b6..a244e3c567567 100644
--- a/libc/test/src/math/performance_testing/sqrtf_perf.cpp
+++ b/libc/test/src/math/performance_testing/sqrtf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/sqrtf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sqrtf, ::sqrtf,
-                                "sqrtf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sqrtf, ::sqrtf,
+                                  "sqrtf_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/performance_testing/truncf_perf.cpp b/libc/test/src/math/performance_testing/truncf_perf.cpp
index ff74c6b4eb64d..11c7d23b92800 100644
--- a/libc/test/src/math/performance_testing/truncf_perf.cpp
+++ b/libc/test/src/math/performance_testing/truncf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/truncf.h"
 
 #include <math.h>
 
-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::truncf, ::truncf,
-                                "truncf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::truncf, ::truncf,
+                                  "truncf_perf.log")
+  return 0;
+}