[libc-commits] [libc] [libc][math] Optimize generic nearest integer functions (PR #98483)
via libc-commits
libc-commits at lists.llvm.org
Thu Jul 11 07:48:16 PDT 2024
https://github.com/overmighty updated https://github.com/llvm/llvm-project/pull/98483
>From 781700f079896b920f373e3fae2ed3a82359f85a Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 11 Jul 2024 14:06:38 +0200
Subject: [PATCH] [libc][math] Optimize generic nearest integer functions
---
.../FPUtil/NearestIntegerOperations.h | 37 ++--
.../math/performance_testing/CMakeLists.txt | 19 ++
.../nearest_integer_funcs_perf.cpp | 168 ++++++++++++++++++
3 files changed, 208 insertions(+), 16 deletions(-)
create mode 100644 libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
diff --git a/libc/src/__support/FPUtil/NearestIntegerOperations.h b/libc/src/__support/FPUtil/NearestIntegerOperations.h
index cff32938229d0..a9a0a97eebb5c 100644
--- a/libc/src/__support/FPUtil/NearestIntegerOperations.h
+++ b/libc/src/__support/FPUtil/NearestIntegerOperations.h
@@ -75,15 +75,17 @@ LIBC_INLINE T ceil(T x) {
}
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- bits.set_mantissa(trunc_mantissa);
- T trunc_value = bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ bits.set_uintval(trunc_u);
+ T trunc_value = bits.get_val();
+
// If x is negative, the ceil operation is equivalent to the trunc operation.
if (is_neg)
return trunc_value;
@@ -130,15 +132,17 @@ LIBC_INLINE T round(T x) {
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
bool half_bit_set =
bool(bits.get_mantissa() & (StorageType(1) << (trim_size - 1)));
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- bits.set_mantissa(trunc_mantissa);
- T trunc_value = bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ bits.set_uintval(trunc_u);
+ T trunc_value = bits.get_val();
+
if (!half_bit_set) {
// Franctional part is less than 0.5 so round value is the
// same as the trunc value.
@@ -188,16 +192,17 @@ round_using_specific_rounding_mode(T x, int rnd) {
}
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
- FPBits<T> new_bits = bits;
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- new_bits.set_mantissa(trunc_mantissa);
- T trunc_value = new_bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ FPBits<T> new_bits(trunc_u);
+ T trunc_value = new_bits.get_val();
+
StorageType trim_value =
bits.get_mantissa() &
static_cast<StorageType>(((StorageType(1) << trim_size) - 1));
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index 4ea78f9999e4d..bf88fbb85c5d7 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -366,3 +366,22 @@ add_perf_binary(
COMPILE_OPTIONS
-fno-builtin
)
+
+add_perf_binary(
+ nearest_integer_funcs_perf
+ SRCS
+ nearest_integer_funcs_perf.cpp
+ DEPENDS
+ libc.src.math.ceilf
+ libc.src.math.ceilf16
+ libc.src.math.floorf
+ libc.src.math.floorf16
+ libc.src.math.roundevenf
+ libc.src.math.roundevenf16
+ libc.src.math.roundf
+ libc.src.math.roundf16
+ libc.src.math.truncf
+ libc.src.math.truncf16
+ COMPILE_OPTIONS
+ -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
new file mode 100644
index 0000000000000..24176a377e9d4
--- /dev/null
+++ b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
@@ -0,0 +1,168 @@
+//===-- Performance test for nearest integer functions --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/ceilf.h"
+#include "src/math/ceilf16.h"
+#include "src/math/floorf.h"
+#include "src/math/floorf16.h"
+#include "src/math/roundevenf.h"
+#include "src/math/roundevenf16.h"
+#include "src/math/roundf.h"
+#include "src/math/roundf16.h"
+#include "src/math/truncf.h"
+#include "src/math/truncf16.h"
+#include "test/src/math/performance_testing/Timer.h"
+
+#include <fstream>
+#include <math.h>
+
+namespace LIBC_NAMESPACE::testing {
+
+template <typename T> class NearestIntegerPerf {
+ using FPBits = fputil::FPBits<T>;
+ using StorageType = typename FPBits::StorageType;
+
+public:
+ typedef T Func(T);
+
+ static void run_perf_in_range(Func my_func, Func other_func,
+ StorageType starting_bit,
+ StorageType ending_bit, StorageType step,
+ size_t rounds, std::ofstream &log) {
+ auto runner = [=](Func func) {
+ volatile T result;
+ for (size_t i = 0; i < rounds; i++) {
+ for (StorageType bits = starting_bit; bits <= ending_bit;
+ bits += step) {
+ T x = FPBits(bits).get_val();
+ result = func(x);
+ }
+ }
+ };
+
+ Timer timer;
+ timer.start();
+ runner(my_func);
+ timer.stop();
+
+ size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
+ double my_average =
+ static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
+ log << "-- My function --\n";
+ log << " Total time : " << timer.nanoseconds() << " ns \n";
+ log << " Average runtime : " << my_average << " ns/op \n";
+ log << " Ops per second : "
+ << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+
+ timer.start();
+ runner(other_func);
+ timer.stop();
+
+ double other_average =
+ static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
+ log << "-- Other function --\n";
+ log << " Total time : " << timer.nanoseconds() << " ns \n";
+ log << " Average runtime : " << other_average << " ns/op \n";
+ log << " Ops per second : "
+ << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+
+ log << "-- Average runtime ratio --\n";
+ log << " Mine / Other's : " << my_average / other_average << " \n";
+ }
+
+ static void run_perf(Func my_func, Func other_func, size_t rounds,
+ const char *log_file) {
+ std::ofstream log(log_file);
+ log << "Performance tests with inputs in normal integral range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/StorageType((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN),
+ /*ending_bit=*/
+ StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
+ << FPBits::SIG_LEN),
+ /*step=*/StorageType(1 << FPBits::SIG_LEN),
+ rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+ log << "\n Performance tests with inputs in low integral range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
+ /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
+ /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
+ rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+ log << "\n Performance tests with inputs in high integral range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/
+ StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN)
+ << FPBits::SIG_LEN),
+ /*ending_bit=*/
+ StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
+ /*step=*/StorageType(1 << FPBits::SIG_LEN),
+ rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+ log << "\n Performance tests with inputs in normal fractional range:\n";
+ run_perf_in_range(
+ my_func, other_func,
+ /*starting_bit=*/
+ StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
+ /*ending_bit=*/
+ StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
+ /*step=*/StorageType(1), rounds * 2, log);
+ log << "\n Performance tests with inputs in subnormal fractional range:\n";
+ run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
+ /*ending_bit=*/StorageType(FPBits::SIG_MASK),
+ /*step=*/StorageType(1), rounds, log);
+ }
+};
+
+} // namespace LIBC_NAMESPACE::testing
+
+#define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename) \
+ { \
+ LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
+ &my_func, &other_func, rounds, filename); \
+ LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
+ &my_func, &other_func, rounds, filename); \
+ }
+
+static constexpr size_t FLOAT16_ROUNDS = 20'000;
+static constexpr size_t FLOAT_ROUNDS = 40;
+
+// LLVM libc might be the only libc implementation with support for float16 math
+// functions currently. We can't compare our float16 functions against the
+// system libc, so we compare them against this placeholder function.
+float16 placeholderf16(float16 x) { return x; }
+
+// The system libc might not provide the roundeven* C23 math functions either.
+float placeholderf(float x) { return x; }
+
+int main() {
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::ceilf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "ceilf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::floorf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "floorf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundevenf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "roundevenf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "roundf16_perf.log")
+ NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::truncf16, ::placeholderf16,
+ FLOAT16_ROUNDS, "truncf16_perf.log")
+
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf, FLOAT_ROUNDS,
+ "ceilf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::floorf, ::floorf, FLOAT_ROUNDS,
+ "floorf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundevenf, ::placeholderf,
+ FLOAT_ROUNDS, "roundevenf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundf, ::roundf, FLOAT_ROUNDS,
+ "roundf_perf.log")
+ NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS,
+ "truncf_perf.log")
+
+ return 0;
+}
More information about the libc-commits
mailing list