[libc-commits] [libc] [libc][math] Optimize misc basic math operations with builtins when available (PR #99037)
via libc-commits
libc-commits at lists.llvm.org
Tue Jul 16 06:23:50 PDT 2024
https://github.com/overmighty created https://github.com/llvm/llvm-project/pull/99037
None
>From 9554a0b45940abfd27066d8f79f70f7001b19d91 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Tue, 16 Jul 2024 13:52:05 +0200
Subject: [PATCH 1/2] [libc][math] Optimize misc basic ops using builtins when
available
---
.../cmake/modules/CheckCompilerFeatures.cmake | 6 +
.../modules/LLVMLibCCompileOptionRules.cmake | 10 ++
libc/cmake/modules/LLVMLibCFlagRules.cmake | 3 +
.../check_builtin_fmax_fmin.cpp | 14 ++
.../check_builtin_frexpf16.cpp | 5 +
libc/src/math/generic/CMakeLists.txt | 26 +++
libc/src/math/generic/copysign.cpp | 4 +
libc/src/math/generic/copysignf.cpp | 4 +
libc/src/math/generic/copysignf16.cpp | 4 +
libc/src/math/generic/fabs.cpp | 8 +-
libc/src/math/generic/fabsf.cpp | 8 +-
libc/src/math/generic/fabsf16.cpp | 8 +-
libc/src/math/generic/fmaximum_num.cpp | 6 +-
libc/src/math/generic/fmaximum_numf.cpp | 6 +-
libc/src/math/generic/fmaximum_numf16.cpp | 4 +
libc/src/math/generic/fminimum_num.cpp | 6 +-
libc/src/math/generic/fminimum_numf.cpp | 6 +-
libc/src/math/generic/fminimum_numf16.cpp | 4 +
libc/src/math/generic/frexpf16.cpp | 4 +
.../BinaryOpSingleOutputPerf.h | 7 +-
.../math/performance_testing/CMakeLists.txt | 20 +++
.../SingleInputSingleOutputPerf.h | 44 +++--
.../misc_basic_ops_perf.cpp | 153 ++++++++++++++++++
23 files changed, 335 insertions(+), 25 deletions(-)
create mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
create mode 100644 libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
create mode 100644 libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a6d793d495c45..a10b88fc2acaa 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -5,6 +5,8 @@
set(
ALL_COMPILER_FEATURES
"builtin_ceil_floor_rint_trunc"
+ "builtin_fmax_fmin"
+ "builtin_frexpf16"
"builtin_round"
"builtin_roundeven"
"float16"
@@ -82,6 +84,10 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
+ elseif(${feature} STREQUAL "builtin_fmax_fmin")
+ set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
+ elseif(${feature} STREQUAL "builtin_frexpf16")
+ set(LIBC_COMPILER_HAS_BUILTIN_FREXPF16 TRUE)
elseif(${feature} STREQUAL "builtin_round")
set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 253da4ae890e5..047468481c2f6 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -6,6 +6,7 @@ function(_get_compile_options_from_flags output_var)
endif()
check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN})
check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN})
+ check_flag(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG ${MISC_MATH_BASIC_OPS_OPT_FLAG} ${ARGN})
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
if(ADD_FMA_FLAG)
@@ -37,6 +38,15 @@ function(_get_compile_options_from_flags output_var)
if(ADD_EXPLICIT_SIMD_OPT_FLAG)
list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT")
endif()
+ if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
+ list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
+ if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
+ list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
+ endif()
+ if(LIBC_COMPILER_HAS_BUILTIN_FREXPF16)
+ list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FREXPF16")
+ endif()
+ endif()
elseif(MSVC)
if(ADD_FMA_FLAG)
list(APPEND compile_options "/arch:AVX2")
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index eca7ba8d183e6..4398fe55db5aa 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -263,6 +263,9 @@ set(FMA_OPT_FLAG "FMA_OPT")
set(ROUND_OPT_FLAG "ROUND_OPT")
# This flag controls whether we use explicit SIMD instructions or not.
set(EXPLICIT_SIMD_OPT_FLAG "EXPLICIT_SIMD_OPT")
+# This flag controls whether we use compiler builtin functions to implement
+# various basic math operations or not.
+set(MISC_MATH_BASIC_OPS_OPT_FLAG "MISC_MATH_BASIC_OPS_OPT")
# Skip FMA_OPT flag for targets that don't support fma.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
new file mode 100644
index 0000000000000..a962df33e31c4
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -0,0 +1,14 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+ return __builtin_fmaxf16(x, y);
+}
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+ return __builtin_fminf16(x, y);
+}
+
+float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
+float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
+
+double try_builtin_fmax(double x, double y) { return __builtin_fmax(x, y); }
+double try_builtin_fmin(double x, double y) { return __builtin_fmin(x, y); }
+
+extern "C" void _start() {}
diff --git a/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
new file mode 100644
index 0000000000000..e65ef54c1a298
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
@@ -0,0 +1,5 @@
+_Float16 try_builtin_frexpf16(_Float16 x, int *exp) {
+ return __builtin_frexpf16(x, exp);
+}
+
+extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c2f58fb1a4f71..777d649d3d4d7 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -400,6 +400,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -412,6 +414,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -437,6 +441,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1406,6 +1412,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1418,6 +1426,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1443,6 +1453,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -1505,6 +1517,8 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.manipulation_functions
+ FLAGS
+ MISC_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2202,6 +2216,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2214,6 +2230,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2239,6 +2257,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2450,6 +2470,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2462,6 +2484,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O2
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
@@ -2487,6 +2511,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.basic_operations
COMPILE_OPTIONS
-O3
+ FLAGS
+ MISC_MATH_BASIC_OPS_OPT
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/copysign.cpp b/libc/src/math/generic/copysign.cpp
index 149d725af08e2..186bb2c5983f4 100644
--- a/libc/src/math/generic/copysign.cpp
+++ b/libc/src/math/generic/copysign.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(double, copysign, (double x, double y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_copysign(x, y);
+#else
return fputil::copysign(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf.cpp b/libc/src/math/generic/copysignf.cpp
index 17cd70d37c308..c79e50b61ebda 100644
--- a/libc/src/math/generic/copysignf.cpp
+++ b/libc/src/math/generic/copysignf.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, copysignf, (float x, float y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_copysignf(x, y);
+#else
return fputil::copysign(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf16.cpp b/libc/src/math/generic/copysignf16.cpp
index 42695b3b4a6de..546622f049ebe 100644
--- a/libc/src/math/generic/copysignf16.cpp
+++ b/libc/src/math/generic/copysignf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, copysignf16, (float16 x, float16 y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_copysignf16(x, y);
+#else
return fputil::copysign(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabs.cpp b/libc/src/math/generic/fabs.cpp
index 472297aecb2f7..55fa958cd7c00 100644
--- a/libc/src/math/generic/fabs.cpp
+++ b/libc/src/math/generic/fabs.cpp
@@ -13,6 +13,12 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(double, fabs, (double x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(double, fabs, (double x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_fabs(x);
+#else
+ return fputil::abs(x);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf.cpp b/libc/src/math/generic/fabsf.cpp
index ad4fcb30c795d..2ba18d09bbd5b 100644
--- a/libc/src/math/generic/fabsf.cpp
+++ b/libc/src/math/generic/fabsf.cpp
@@ -13,6 +13,12 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float, fabsf, (float x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float, fabsf, (float x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_fabsf(x);
+#else
+ return fputil::abs(x);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf16.cpp b/libc/src/math/generic/fabsf16.cpp
index 57671fb6067e2..2f982517614c4 100644
--- a/libc/src/math/generic/fabsf16.cpp
+++ b/libc/src/math/generic/fabsf16.cpp
@@ -13,6 +13,12 @@
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+ return __builtin_fabsf16(x);
+#else
+ return fputil::abs(x);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_num.cpp b/libc/src/math/generic/fmaximum_num.cpp
index 33df7daa380df..1bfc1514393ee 100644
--- a/libc/src/math/generic/fmaximum_num.cpp
+++ b/libc/src/math/generic/fmaximum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_num function----------------------------===//
+//===-- Implementation of fmaximum_num function ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(double, fmaximum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmax(x, y);
+#else
return fputil::fmaximum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf.cpp b/libc/src/math/generic/fmaximum_numf.cpp
index 1577080ba2c25..f8c69fa78be3d 100644
--- a/libc/src/math/generic/fmaximum_numf.cpp
+++ b/libc/src/math/generic/fmaximum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_numf function---------------------------===//
+//===-- Implementation of fmaximum_numf function --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, fmaximum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmaxf(x, y);
+#else
return fputil::fmaximum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index 394ce8b5fe4f3..6a012d38abea4 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmaxf16(x, y);
+#else
return fputil::fmaximum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_num.cpp b/libc/src/math/generic/fminimum_num.cpp
index 3ff79def58075..5b9c426ca50c2 100644
--- a/libc/src/math/generic/fminimum_num.cpp
+++ b/libc/src/math/generic/fminimum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_num function----------------------------===//
+//===-- Implementation of fminimum_num function ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(double, fminimum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fmin(x, y);
+#else
return fputil::fminimum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf.cpp b/libc/src/math/generic/fminimum_numf.cpp
index c7ac99b14bd5a..6b6f905e63de3 100644
--- a/libc/src/math/generic/fminimum_numf.cpp
+++ b/libc/src/math/generic/fminimum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_numf function---------------------------===//
+//===-- Implementation of fminimum_numf function --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float, fminimum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fminf(x, y);
+#else
return fputil::fminimum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index 0af7205713c10..8e48aaf27070f 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+ return __builtin_fminf16(x, y);
+#else
return fputil::fminimum_num(x, y);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/frexpf16.cpp b/libc/src/math/generic/frexpf16.cpp
index 4571b0d0ea389..342fdd39046f8 100644
--- a/libc/src/math/generic/frexpf16.cpp
+++ b/libc/src/math/generic/frexpf16.cpp
@@ -14,7 +14,11 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, frexpf16, (float16 x, int *exp)) {
+#ifdef __LIBC_USE_BUILTIN_FREXPF16
+ return __builtin_frexpf16(x, exp);
+#else
return fputil::frexp(x, *exp);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 995e41ba84b03..1ab0afbc9cbe8 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -10,6 +10,7 @@
#include "src/__support/macros/config.h"
#include "test/src/math/performance_testing/Timer.h"
+#include <algorithm>
#include <cstddef>
#include <fstream>
@@ -28,11 +29,11 @@ template <typename T> class BinaryOpSingleOutputPerf {
static void run_perf_in_range(Func myFunc, Func otherFunc,
StorageType startingBit, StorageType endingBit,
size_t N, size_t rounds, std::ofstream &log) {
- if (endingBit - startingBit < N)
- N = endingBit - startingBit;
+ if (sizeof(StorageType) <= sizeof(size_t))
+ N = std::min(N, static_cast<size_t>(endingBit - startingBit));
auto runner = [=](Func func) {
- volatile T result;
+ [[maybe_unused]] volatile T result;
if (endingBit < startingBit) {
return;
}
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index a75becba04d07..a4059c8ff4dd8 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -402,3 +402,23 @@ add_perf_binary(
LINK_LIBRARIES
LibcFPTestHelpers
)
+
+add_perf_binary(
+ misc_basic_ops_perf
+ SRCS
+ misc_basic_ops_perf.cpp
+ DEPENDS
+ .binary_op_single_output_diff
+ .single_input_single_output_diff
+ libc.src.math.copysignf
+ libc.src.math.copysignf16
+ libc.src.math.fabsf
+ libc.src.math.fabsf16
+ libc.src.math.fmaximum_numf
+ libc.src.math.fmaximum_numf16
+ libc.src.math.fminimum_numf
+ libc.src.math.fminimum_numf16
+ libc.src.math.frexpf16
+ COMPILE_OPTIONS
+ -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index 48ae43d6315e3..e0beb729cb9f5 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -10,6 +10,7 @@
#include "src/__support/macros/config.h"
#include "test/src/math/performance_testing/Timer.h"
+#include <algorithm>
#include <fstream>
namespace LIBC_NAMESPACE_DECL {
@@ -26,16 +27,21 @@ template <typename T> class SingleInputSingleOutputPerf {
static void runPerfInRange(Func myFunc, Func otherFunc,
StorageType startingBit, StorageType endingBit,
- std::ofstream &log) {
+ size_t rounds, std::ofstream &log) {
+ size_t n = 10'010'001;
+ if (sizeof(StorageType) <= sizeof(size_t))
+ n = std::min(n, static_cast<size_t>(endingBit - startingBit));
+
auto runner = [=](Func func) {
- constexpr StorageType N = 10'010'001;
- StorageType step = (endingBit - startingBit) / N;
+ StorageType step = (endingBit - startingBit) / n;
if (step == 0)
step = 1;
- volatile T result;
- for (StorageType bits = startingBit; bits < endingBit; bits += step) {
- T x = FPBits(bits).get_val();
- result = func(x);
+ [[maybe_unused]] volatile T result;
+ for (size_t i = 0; i < rounds; i++) {
+ for (StorageType bits = startingBit; bits < endingBit; bits += step) {
+ T x = FPBits(bits).get_val();
+ result = func(x);
+ }
}
};
@@ -44,8 +50,7 @@ template <typename T> class SingleInputSingleOutputPerf {
runner(myFunc);
timer.stop();
- StorageType numberOfRuns = endingBit - startingBit + 1;
- double myAverage = static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+ double myAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
log << "-- My function --\n";
log << " Total time : " << timer.nanoseconds() << " ns \n";
log << " Average runtime : " << myAverage << " ns/op \n";
@@ -56,8 +61,7 @@ template <typename T> class SingleInputSingleOutputPerf {
runner(otherFunc);
timer.stop();
- double otherAverage =
- static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+ double otherAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
log << "-- Other function --\n";
log << " Total time : " << timer.nanoseconds() << " ns \n";
log << " Average runtime : " << otherAverage << " ns/op \n";
@@ -68,15 +72,18 @@ template <typename T> class SingleInputSingleOutputPerf {
log << " Mine / Other's : " << myAverage / otherAverage << " \n";
}
- static void runPerf(Func myFunc, Func otherFunc, const char *logFile) {
+ static void runPerf(Func myFunc, Func otherFunc, size_t rounds,
+ const char *logFile) {
std::ofstream log(logFile);
log << " Performance tests with inputs in denormal range:\n";
runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0),
- /* endingBit= */ FPBits::max_subnormal().uintval(), log);
+ /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
+ log);
log << "\n Performance tests with inputs in normal range:\n";
runPerfInRange(myFunc, otherFunc,
/* startingBit= */ FPBits::min_normal().uintval(),
- /* endingBit= */ FPBits::max_normal().uintval(), log);
+ /* endingBit= */ FPBits::max_normal().uintval(), rounds,
+ log);
}
};
@@ -86,6 +93,13 @@ template <typename T> class SingleInputSingleOutputPerf {
#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \
int main() { \
LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf( \
- &myFunc, &otherFunc, filename); \
+ &myFunc, &otherFunc, 1, filename); \
return 0; \
}
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds, \
+ filename) \
+ { \
+ LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf( \
+ &myFunc, &otherFunc, rounds, filename); \
+ }
diff --git a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
new file mode 100644
index 0000000000000..6c3ee72a4c5d3
--- /dev/null
+++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
@@ -0,0 +1,153 @@
+//===-- Performance test for miscellaneous basic operations ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "SingleInputSingleOutputPerf.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/copysignf.h"
+#include "src/math/copysignf16.h"
+#include "src/math/fabsf.h"
+#include "src/math/fabsf16.h"
+#include "src/math/fmaximum_numf.h"
+#include "src/math/fmaximum_numf16.h"
+#include "src/math/fminimum_numf.h"
+#include "src/math/fminimum_numf16.h"
+#include "src/math/frexpf16.h"
+#include "test/src/math/performance_testing/Timer.h"
+
+#include <algorithm>
+#include <fstream>
+#include <math.h>
+
+namespace LIBC_NAMESPACE::testing {
+
+template <typename T> class FrexpPerf {
+ using FPBits = fputil::FPBits<T>;
+ using StorageType = typename FPBits::StorageType;
+
+public:
+ typedef T Func(T, int *);
+
+ static void run_perf_in_range(Func my_func, Func other_func,
+ StorageType starting_bit,
+ StorageType ending_bit, size_t rounds,
+ std::ofstream &log) {
+ size_t n = 10'010'001;
+ if (sizeof(StorageType) <= sizeof(size_t))
+ n = std::min(n, static_cast<size_t>(ending_bit - starting_bit));
+
+ auto runner = [=](Func func) {
+ StorageType step = (ending_bit - starting_bit) / n;
+ if (step == 0)
+ step = 1;
+ [[maybe_unused]] volatile T result;
+ int result_exp;
+ for (size_t i = 0; i < rounds; i++) {
+ for (StorageType bits = starting_bit; bits < ending_bit; bits += step) {
+ T x = FPBits(bits).get_val();
+ result = func(x, &result_exp);
+ }
+ }
+ };
+
+ Timer timer;
+ timer.start();
+ runner(my_func);
+ timer.stop();
+
+ double my_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
+ log << "-- My function --\n";
+ log << " Total time : " << timer.nanoseconds() << " ns \n";
+ log << " Average runtime : " << my_average << " ns/op \n";
+ log << " Ops per second : "
+ << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+
+ timer.start();
+ runner(other_func);
+ timer.stop();
+
+ double other_average =
+ static_cast<double>(timer.nanoseconds()) / n / rounds;
+ log << "-- Other function --\n";
+ log << " Total time : " << timer.nanoseconds() << " ns \n";
+ log << " Average runtime : " << other_average << " ns/op \n";
+ log << " Ops per second : "
+ << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+
+ log << "-- Average runtime ratio --\n";
+ log << " Mine / Other's : " << my_average / other_average << " \n";
+ }
+
+ static void run_perf(Func my_func, Func other_func, size_t rounds,
+ const char *log_file) {
+ std::ofstream log(log_file);
+ log << " Performance tests with inputs in denormal range:\n";
+ run_perf_in_range(my_func, other_func, /* startingBit= */ StorageType(0),
+ /* endingBit= */ FPBits::max_subnormal().uintval(),
+ rounds, log);
+ log << "\n Performance tests with inputs in normal range:\n";
+ run_perf_in_range(my_func, other_func,
+ /* startingBit= */ FPBits::min_normal().uintval(),
+ /* endingBit= */ FPBits::max_normal().uintval(), rounds,
+ log);
+ }
+};
+
+} // namespace LIBC_NAMESPACE::testing
+
+#define FREXP_PERF(T, my_func, other_func, rounds, filename) \
+ { \
+ LIBC_NAMESPACE::testing::FrexpPerf<T>::run_perf(&my_func, &other_func, \
+ rounds, filename); \
+ LIBC_NAMESPACE::testing::FrexpPerf<T>::run_perf(&my_func, &other_func, \
+ rounds, filename); \
+ }
+
+static constexpr size_t FLOAT16_ROUNDS = 20'000;
+static constexpr size_t FLOAT_ROUNDS = 40;
+
+// LLVM libc might be the only libc implementation with support for float16 math
+// functions currently. We can't compare our float16 functions against the
+// system libc, so we compare them against this placeholder function.
+float16 placeholder_unaryf16(float16 x) { return x; }
+float16 placeholder_binaryf16(float16 x, float16 y) { return x; }
+float16 placeholder_frexpf16(float16 x, int *exp) { return x; }
+
+// The system libc might not provide the f{max,min}imum_num* C23 math functions
+// either.
+float placeholder_binaryf(float x, float y) { return x; }
+
+int main() {
+ SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fabsf16,
+ placeholder_unaryf16, FLOAT16_ROUNDS,
+ "fabsf16_perf.log")
+ BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::copysignf16,
+ placeholder_binaryf16, FLOAT16_ROUNDS,
+ "copysignf16_perf.log")
+ BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fmaximum_numf16,
+ placeholder_binaryf16, FLOAT16_ROUNDS,
+ "fmaximum_numf16_perf.log")
+ BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fminimum_numf16,
+ placeholder_binaryf16, FLOAT16_ROUNDS,
+ "fminimum_numf16_perf.log")
+ FREXP_PERF(float16, LIBC_NAMESPACE::frexpf16, placeholder_frexpf16,
+ FLOAT16_ROUNDS, "frexpf16_perf.log")
+
+ SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fabsf, fabsf,
+ FLOAT_ROUNDS, "fabsf_perf.log")
+ BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::copysignf, copysignf,
+ FLOAT_ROUNDS, "copysignf_perf.log")
+ BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fmaximum_numf,
+ placeholder_binaryf, FLOAT_ROUNDS,
+ "fmaximum_numf_perf.log")
+ BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fminimum_numf,
+ placeholder_binaryf, FLOAT_ROUNDS,
+ "fminimum_numf_perf.log")
+
+ return 0;
+}
>From c422169d45ddb0fa83072736017deed681882f71 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Tue, 16 Jul 2024 14:33:32 +0200
Subject: [PATCH 2/2] fixup! [libc][math] Optimize misc basic ops using
builtins when available
Remove support for __builtin_frexpf16 as it decreases performance.
---
libc/cmake/modules/CheckCompilerFeatures.cmake | 3 ---
libc/cmake/modules/LLVMLibCCompileOptionRules.cmake | 3 ---
.../modules/compiler_features/check_builtin_frexpf16.cpp | 5 -----
libc/src/math/generic/CMakeLists.txt | 2 --
libc/src/math/generic/frexpf16.cpp | 4 ----
5 files changed, 17 deletions(-)
delete mode 100644 libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a10b88fc2acaa..2bab968f901eb 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -6,7 +6,6 @@ set(
ALL_COMPILER_FEATURES
"builtin_ceil_floor_rint_trunc"
"builtin_fmax_fmin"
- "builtin_frexpf16"
"builtin_round"
"builtin_roundeven"
"float16"
@@ -86,8 +85,6 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
elseif(${feature} STREQUAL "builtin_fmax_fmin")
set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
- elseif(${feature} STREQUAL "builtin_frexpf16")
- set(LIBC_COMPILER_HAS_BUILTIN_FREXPF16 TRUE)
elseif(${feature} STREQUAL "builtin_round")
set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 047468481c2f6..ead578f95ac72 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -43,9 +43,6 @@ function(_get_compile_options_from_flags output_var)
if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
endif()
- if(LIBC_COMPILER_HAS_BUILTIN_FREXPF16)
- list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FREXPF16")
- endif()
endif()
elseif(MSVC)
if(ADD_FMA_FLAG)
diff --git a/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
deleted file mode 100644
index e65ef54c1a298..0000000000000
--- a/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-_Float16 try_builtin_frexpf16(_Float16 x, int *exp) {
- return __builtin_frexpf16(x, exp);
-}
-
-extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 777d649d3d4d7..3d713368251f6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1517,8 +1517,6 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.manipulation_functions
- FLAGS
- MISC_BASIC_OPS_OPT
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/frexpf16.cpp b/libc/src/math/generic/frexpf16.cpp
index 342fdd39046f8..4571b0d0ea389 100644
--- a/libc/src/math/generic/frexpf16.cpp
+++ b/libc/src/math/generic/frexpf16.cpp
@@ -14,11 +14,7 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, frexpf16, (float16 x, int *exp)) {
-#ifdef __LIBC_USE_BUILTIN_FREXPF16
- return __builtin_frexpf16(x, exp);
-#else
return fputil::frexp(x, *exp);
-#endif
}
} // namespace LIBC_NAMESPACE_DECL
More information about the libc-commits
mailing list