[libc-commits] [libc] [libc][math][c23] add performance tests for fmul (PR #106262)

Tue Aug 27 11:14:33 PDT 2024

https://github.com/Jobhdez created https://github.com/llvm/llvm-project/pull/106262

None

>From 05ef61e92a112357246ee94df5739780e29e9a75 Mon Sep 17 00:00:00 2001
From: Job Hernandez <hj93 at protonmail.com>
Date: Tue, 27 Aug 2024 11:13:11 -0700
Subject: [PATCH 1/2] add performance tests for fmul

---
 .../BinaryOpSingleOutputPerf.h                | 115 ++++++++++++++++++
 .../math/performance_testing/CMakeLists.txt   |  12 ++
 .../math/performance_testing/fmul_perf.cpp    |  34 ++++++
 3 files changed, 161 insertions(+)
 create mode 100644 libc/test/src/math/performance_testing/fmul_perf.cpp

diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 63d9768e21899b..7e804c5b3c576b 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -122,6 +122,112 @@ template <typename T> class BinaryOpSingleOutputPerf {
   }
 };
 
+template <typename T, typename R>
+class BinaryOpSingleDifferentTypeOutputPerf {
+  using FPBits = fputil::FPBits<T>;
+  using StorageType = typename FPBits::StorageType;
+  static constexpr StorageType UIntMax = cpp::numeric_limits<StorageType>::max();
+
+public:
+  typedef T Func(R, R);
+
+  static void run_perf_in_range(Func myFunc, Func otherFunc,
+                                StorageType startingBit, StorageType endingBit,
+                                size_t N, size_t rounds, std::ofstream &log) {
+    if (sizeof(StorageType) <= sizeof(size_t))
+      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
+
+    auto runner = [=](Func func) {
+      [[maybe_unused]] volatile T result;
+      if (endingBit < startingBit) {
+        return;
+      }
+
+      StorageType step = (endingBit - startingBit) / N;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bitsX = startingBit, bitsY = endingBit;;
+             bitsX += step, bitsY -= step) {
+          R x = FPBits(bitsX).get_val();
+          R y = FPBits(bitsY).get_val();
+          result = func(x, y);
+          if (endingBit - bitsX < step) {
+            break;
+          }
+        }
+      }
+    };
+
+    Timer timer;
+    timer.start();
+    runner(myFunc);
+    timer.stop();
+
+    double my_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- My function --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << my_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+
+    timer.start();
+    runner(otherFunc);
+    timer.stop();
+
+    double other_average =
+        static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Other function --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << other_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+
+    log << "-- Average runtime ratio --\n";
+    log << "     Mine / Other's  : " << my_average / other_average << " \n";
+  }
+
+  static void run_perf(Func myFunc, Func otherFunc, int rounds,
+                       const char *logFile) {
+    std::ofstream log(logFile);
+    log << " Performance tests with inputs in denormal range:\n";
+    run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0),
+                      /* endingBit= */ FPBits::max_subnormal().uintval(),
+                      1'000'001, rounds, log);
+    log << "\n Performance tests with inputs in normal range:\n";
+    run_perf_in_range(myFunc, otherFunc,
+                      /* startingBit= */ FPBits::min_normal().uintval(),
+                      /* endingBit= */ FPBits::max_normal().uintval(),
+                      1'000'001, rounds, log);
+    log << "\n Performance tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    run_perf_in_range(myFunc, otherFunc,
+                      /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+                      /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(),
+                      1'000'001, rounds, log);
+  }
+
+  static void run_diff(Func myFunc, Func otherFunc, const char *logFile) {
+    uint64_t diffCount = 0;
+    std::ofstream log(logFile);
+    log << " Diff tests with inputs in denormal range:\n";
+    diffCount += run_diff_in_range(
+        myFunc, otherFunc, /* startingBit= */ StorageType(0),
+        /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, log);
+    log << "\n Diff tests with inputs in normal range:\n";
+    diffCount += run_diff_in_range(
+        myFunc, otherFunc,
+        /* startingBit= */ FPBits::min_normal().uintval(),
+        /* endingBit= */ FPBits::max_normal().uintval(), 100'000'001, log);
+    log << "\n Diff tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    diffCount += run_diff_in_range(
+        myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 10'000'001, log);
+
+    log << "Total number of differing results: " << diffCount << '\n';
+  }
+};
+
+
 } // namespace testing
 } // namespace LIBC_NAMESPACE_DECL
 
@@ -140,3 +246,12 @@ template <typename T> class BinaryOpSingleOutputPerf {
     LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
         &myFunc, &otherFunc, rounds, filename);                                \
   }
+
+#define BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(T, R, myFunc, otherFunc, rounds, filename) \
+                                                                                             \
+  {									                     \
+    LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<T,R>::run_perf(           \
+        &myFunc, &otherFunc, rounds, filename);                                              \
+     LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<T,R>::run_perf(          \
+        &myFunc, &otherFunc, rounds, filename);                                              \
+   }
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index 8e529ca09ed797..207a463fe185f8 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -476,3 +476,15 @@ add_perf_binary(
   COMPILE_OPTIONS
     -fno-builtin
 )
+
+add_perf_binary(
+  fmul_perf
+  SRCS
+    fmul_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.fmul
+    libc.src.math.fmull
+  COMPILE_OPTIONS
+    -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/fmul_perf.cpp b/libc/test/src/math/performance_testing/fmul_perf.cpp
new file mode 100644
index 00000000000000..1d242fd3d19647
--- /dev/null
+++ b/libc/test/src/math/performance_testing/fmul_perf.cpp
@@ -0,0 +1,34 @@
+//===-- Performance test for maximum and minimum functions ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "src/math/fmul.h"
+#include "src/math/fmull.h"
+#include <math.h>
+
+static constexpr size_t DOUBLE_ROUNDS = 40;
+static constexpr size_t LONG_DOUBLE_ROUNDS = 40;
+
+float fmul_placeholder_binary(double x, double y) {
+  return static_cast<float>(x*y);
+}
+
+float fmull_placeholder_binary(long double x, long double y) {
+  return static_cast<float>(x*y);
+}
+
+int main() {
+  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
+				  fmul_placeholder_binary, DOUBLE_ROUNDS,
+				  "fmul_perf.log")
+  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
+				  fmull_placeholder_binary, LONG_DOUBLE_ROUNDS,
+				  "fmull_perf.log")
+  return 0;
+}
+    

>From b7614b3d6129770d0a79d25931b171679d4772eb Mon Sep 17 00:00:00 2001
From: Job Hernandez <hj93 at protonmail.com>
Date: Tue, 27 Aug 2024 11:13:28 -0700
Subject: [PATCH 2/2] format code

---
 .../BinaryOpSingleOutputPerf.h                | 24 +++++++++----------
 .../math/performance_testing/fmul_perf.cpp    | 17 +++++++------
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 7e804c5b3c576b..5941aa71689a59 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -122,11 +122,11 @@ template <typename T> class BinaryOpSingleOutputPerf {
   }
 };
 
-template <typename T, typename R>
-class BinaryOpSingleDifferentTypeOutputPerf {
+template <typename T, typename R> class BinaryOpSingleDifferentTypeOutputPerf {
   using FPBits = fputil::FPBits<T>;
   using StorageType = typename FPBits::StorageType;
-  static constexpr StorageType UIntMax = cpp::numeric_limits<StorageType>::max();
+  static constexpr StorageType UIntMax =
+      cpp::numeric_limits<StorageType>::max();
 
 public:
   typedef T Func(R, R);
@@ -227,7 +227,6 @@ class BinaryOpSingleDifferentTypeOutputPerf {
   }
 };
 
-
 } // namespace testing
 } // namespace LIBC_NAMESPACE_DECL
 
@@ -247,11 +246,12 @@ class BinaryOpSingleDifferentTypeOutputPerf {
         &myFunc, &otherFunc, rounds, filename);                                \
   }
 
-#define BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(T, R, myFunc, otherFunc, rounds, filename) \
-                                                                                             \
-  {									                     \
-    LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<T,R>::run_perf(           \
-        &myFunc, &otherFunc, rounds, filename);                                              \
-     LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<T,R>::run_perf(          \
-        &myFunc, &otherFunc, rounds, filename);                                              \
-   }
+#define BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(                        \
+    T, R, myFunc, otherFunc, rounds, filename)                                 \
+                                                                               \
+  {                                                                            \
+    LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<            \
+        T, R>::run_perf(&myFunc, &otherFunc, rounds, filename);                \
+    LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<            \
+        T, R>::run_perf(&myFunc, &otherFunc, rounds, filename);                \
+  }
diff --git a/libc/test/src/math/performance_testing/fmul_perf.cpp b/libc/test/src/math/performance_testing/fmul_perf.cpp
index 1d242fd3d19647..9e692b1848602e 100644
--- a/libc/test/src/math/performance_testing/fmul_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmul_perf.cpp
@@ -15,20 +15,19 @@ static constexpr size_t DOUBLE_ROUNDS = 40;
 static constexpr size_t LONG_DOUBLE_ROUNDS = 40;
 
 float fmul_placeholder_binary(double x, double y) {
-  return static_cast<float>(x*y);
+  return static_cast<float>(x * y);
 }
 
 float fmull_placeholder_binary(long double x, long double y) {
-  return static_cast<float>(x*y);
+  return static_cast<float>(x * y);
 }
 
 int main() {
-  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
-				  fmul_placeholder_binary, DOUBLE_ROUNDS,
-				  "fmul_perf.log")
-  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
-				  fmull_placeholder_binary, LONG_DOUBLE_ROUNDS,
-				  "fmull_perf.log")
+  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(
+      float, double, LIBC_NAMESPACE::fmul, fmul_placeholder_binary,
+      DOUBLE_ROUNDS, "fmul_perf.log")
+  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(
+      float, long double, LIBC_NAMESPACE::fmull, fmull_placeholder_binary,
+      LONG_DOUBLE_ROUNDS, "fmull_perf.log")
   return 0;
 }
-