[libc-commits] [libc] [libc][math] Optimize copysign{, f, f16} and fabs{, f, f16} with builtins when available (PR #99037)

Mon Jul 22 08:30:54 PDT 2024

https://github.com/overmighty updated https://github.com/llvm/llvm-project/pull/99037

>From 9554a0b45940abfd27066d8f79f70f7001b19d91 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Tue, 16 Jul 2024 13:52:05 +0200
Subject: [PATCH 1/9] [libc][math] Optimize misc basic ops using builtins when
 available

---
 .../cmake/modules/CheckCompilerFeatures.cmake |   6 +
 .../modules/LLVMLibCCompileOptionRules.cmake  |  10 ++
 libc/cmake/modules/LLVMLibCFlagRules.cmake    |   3 +
 .../check_builtin_fmax_fmin.cpp               |  14 ++
 .../check_builtin_frexpf16.cpp                |   5 +
 libc/src/math/generic/CMakeLists.txt          |  26 +++
 libc/src/math/generic/copysign.cpp            |   4 +
 libc/src/math/generic/copysignf.cpp           |   4 +
 libc/src/math/generic/copysignf16.cpp         |   4 +
 libc/src/math/generic/fabs.cpp                |   8 +-
 libc/src/math/generic/fabsf.cpp               |   8 +-
 libc/src/math/generic/fabsf16.cpp             |   8 +-
 libc/src/math/generic/fmaximum_num.cpp        |   6 +-
 libc/src/math/generic/fmaximum_numf.cpp       |   6 +-
 libc/src/math/generic/fmaximum_numf16.cpp     |   4 +
 libc/src/math/generic/fminimum_num.cpp        |   6 +-
 libc/src/math/generic/fminimum_numf.cpp       |   6 +-
 libc/src/math/generic/fminimum_numf16.cpp     |   4 +
 libc/src/math/generic/frexpf16.cpp            |   4 +
 .../BinaryOpSingleOutputPerf.h                |   7 +-
 .../math/performance_testing/CMakeLists.txt   |  20 +++
 .../SingleInputSingleOutputPerf.h             |  44 +++--
 .../misc_basic_ops_perf.cpp                   | 153 ++++++++++++++++++
 23 files changed, 335 insertions(+), 25 deletions(-)
 create mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
 create mode 100644 libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
 create mode 100644 libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a6d793d495c45..a10b88fc2acaa 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -5,6 +5,8 @@
 set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
+    "builtin_fmax_fmin"
+    "builtin_frexpf16"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -82,6 +84,10 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
     elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
+    elseif(${feature} STREQUAL "builtin_fmax_fmin")
+      set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
+    elseif(${feature} STREQUAL "builtin_frexpf16")
+      set(LIBC_COMPILER_HAS_BUILTIN_FREXPF16 TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 253da4ae890e5..047468481c2f6 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -6,6 +6,7 @@ function(_get_compile_options_from_flags output_var)
   endif()
   check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN})
   check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN})
+  check_flag(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG ${MISC_MATH_BASIC_OPS_OPT_FLAG} ${ARGN})
 
   if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
     if(ADD_FMA_FLAG)
@@ -37,6 +38,15 @@ function(_get_compile_options_from_flags output_var)
     if(ADD_EXPLICIT_SIMD_OPT_FLAG)
       list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT")
     endif()
+    if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
+      list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
+      if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
+        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
+      endif()
+      if(LIBC_COMPILER_HAS_BUILTIN_FREXPF16)
+        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FREXPF16")
+      endif()
+    endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
       list(APPEND compile_options "/arch:AVX2")
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index eca7ba8d183e6..4398fe55db5aa 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -263,6 +263,9 @@ set(FMA_OPT_FLAG "FMA_OPT")
 set(ROUND_OPT_FLAG "ROUND_OPT")
 # This flag controls whether we use explicit SIMD instructions or not.
 set(EXPLICIT_SIMD_OPT_FLAG "EXPLICIT_SIMD_OPT")
+# This flag controls whether we use compiler builtin functions to implement
+# various basic math operations or not.
+set(MISC_MATH_BASIC_OPS_OPT_FLAG "MISC_MATH_BASIC_OPS_OPT")
 
 # Skip FMA_OPT flag for targets that don't support fma.
 if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
new file mode 100644
index 0000000000000..a962df33e31c4
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -0,0 +1,14 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+  return __builtin_fmaxf16(x, y);
+}
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+  return __builtin_fminf16(x, y);
+}
+
+float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
+float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
+
+double try_builtin_fmax(double x, double y) { return __builtin_fmax(x, y); }
+double try_builtin_fmin(double x, double y) { return __builtin_fmin(x, y); }
+
+extern "C" void _start() {}
diff --git a/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
new file mode 100644
index 0000000000000..e65ef54c1a298
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
@@ -0,0 +1,5 @@
+_Float16 try_builtin_frexpf16(_Float16 x, int *exp) {
+  return __builtin_frexpf16(x, exp);
+}
+
+extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c2f58fb1a4f71..777d649d3d4d7 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -400,6 +400,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -412,6 +414,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -437,6 +441,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1406,6 +1412,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.manipulation_functions
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1418,6 +1426,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.manipulation_functions
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1443,6 +1453,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.manipulation_functions
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -1505,6 +1517,8 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.macros.properties.types
     libc.src.__support.FPUtil.manipulation_functions
+  FLAGS
+    MISC_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2202,6 +2216,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2214,6 +2230,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2239,6 +2257,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2450,6 +2470,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2462,6 +2484,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2487,6 +2511,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/copysign.cpp b/libc/src/math/generic/copysign.cpp
index 149d725af08e2..186bb2c5983f4 100644
--- a/libc/src/math/generic/copysign.cpp
+++ b/libc/src/math/generic/copysign.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, copysign, (double x, double y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_copysign(x, y);
+#else
   return fputil::copysign(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf.cpp b/libc/src/math/generic/copysignf.cpp
index 17cd70d37c308..c79e50b61ebda 100644
--- a/libc/src/math/generic/copysignf.cpp
+++ b/libc/src/math/generic/copysignf.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, copysignf, (float x, float y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_copysignf(x, y);
+#else
   return fputil::copysign(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/copysignf16.cpp b/libc/src/math/generic/copysignf16.cpp
index 42695b3b4a6de..546622f049ebe 100644
--- a/libc/src/math/generic/copysignf16.cpp
+++ b/libc/src/math/generic/copysignf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, copysignf16, (float16 x, float16 y)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_copysignf16(x, y);
+#else
   return fputil::copysign(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabs.cpp b/libc/src/math/generic/fabs.cpp
index 472297aecb2f7..55fa958cd7c00 100644
--- a/libc/src/math/generic/fabs.cpp
+++ b/libc/src/math/generic/fabs.cpp
@@ -13,6 +13,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(double, fabs, (double x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(double, fabs, (double x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_fabs(x);
+#else
+  return fputil::abs(x);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf.cpp b/libc/src/math/generic/fabsf.cpp
index ad4fcb30c795d..2ba18d09bbd5b 100644
--- a/libc/src/math/generic/fabsf.cpp
+++ b/libc/src/math/generic/fabsf.cpp
@@ -13,6 +13,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float, fabsf, (float x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float, fabsf, (float x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_fabsf(x);
+#else
+  return fputil::abs(x);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsf16.cpp b/libc/src/math/generic/fabsf16.cpp
index 57671fb6067e2..2f982517614c4 100644
--- a/libc/src/math/generic/fabsf16.cpp
+++ b/libc/src/math/generic/fabsf16.cpp
@@ -13,6 +13,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) { return fputil::abs(x); }
+LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) {
+#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  return __builtin_fabsf16(x);
+#else
+  return fputil::abs(x);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_num.cpp b/libc/src/math/generic/fmaximum_num.cpp
index 33df7daa380df..1bfc1514393ee 100644
--- a/libc/src/math/generic/fmaximum_num.cpp
+++ b/libc/src/math/generic/fmaximum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_num function----------------------------===//
+//===-- Implementation of fmaximum_num function ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fmaximum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmax(x, y);
+#else
   return fputil::fmaximum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf.cpp b/libc/src/math/generic/fmaximum_numf.cpp
index 1577080ba2c25..f8c69fa78be3d 100644
--- a/libc/src/math/generic/fmaximum_numf.cpp
+++ b/libc/src/math/generic/fmaximum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_numf function---------------------------===//
+//===-- Implementation of fmaximum_numf function --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, fmaximum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmaxf(x, y);
+#else
   return fputil::fmaximum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index 394ce8b5fe4f3..6a012d38abea4 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmaxf16(x, y);
+#else
   return fputil::fmaximum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_num.cpp b/libc/src/math/generic/fminimum_num.cpp
index 3ff79def58075..5b9c426ca50c2 100644
--- a/libc/src/math/generic/fminimum_num.cpp
+++ b/libc/src/math/generic/fminimum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_num function----------------------------===//
+//===-- Implementation of fminimum_num function ---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fminimum_num, (double x, double y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fmin(x, y);
+#else
   return fputil::fminimum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf.cpp b/libc/src/math/generic/fminimum_numf.cpp
index c7ac99b14bd5a..6b6f905e63de3 100644
--- a/libc/src/math/generic/fminimum_numf.cpp
+++ b/libc/src/math/generic/fminimum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_numf function---------------------------===//
+//===-- Implementation of fminimum_numf function --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, fminimum_numf, (float x, float y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fminf(x, y);
+#else
   return fputil::fminimum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index 0af7205713c10..8e48aaf27070f 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+  return __builtin_fminf16(x, y);
+#else
   return fputil::fminimum_num(x, y);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/frexpf16.cpp b/libc/src/math/generic/frexpf16.cpp
index 4571b0d0ea389..342fdd39046f8 100644
--- a/libc/src/math/generic/frexpf16.cpp
+++ b/libc/src/math/generic/frexpf16.cpp
@@ -14,7 +14,11 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, frexpf16, (float16 x, int *exp)) {
+#ifdef __LIBC_USE_BUILTIN_FREXPF16
+  return __builtin_frexpf16(x, exp);
+#else
   return fputil::frexp(x, *exp);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 995e41ba84b03..1ab0afbc9cbe8 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -10,6 +10,7 @@
 #include "src/__support/macros/config.h"
 #include "test/src/math/performance_testing/Timer.h"
 
+#include <algorithm>
 #include <cstddef>
 #include <fstream>
 
@@ -28,11 +29,11 @@ template <typename T> class BinaryOpSingleOutputPerf {
   static void run_perf_in_range(Func myFunc, Func otherFunc,
                                 StorageType startingBit, StorageType endingBit,
                                 size_t N, size_t rounds, std::ofstream &log) {
-    if (endingBit - startingBit < N)
-      N = endingBit - startingBit;
+    if (sizeof(StorageType) <= sizeof(size_t))
+      N = std::min(N, static_cast<size_t>(endingBit - startingBit));
 
     auto runner = [=](Func func) {
-      volatile T result;
+      [[maybe_unused]] volatile T result;
       if (endingBit < startingBit) {
         return;
       }
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index a75becba04d07..a4059c8ff4dd8 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -402,3 +402,23 @@ add_perf_binary(
   LINK_LIBRARIES
     LibcFPTestHelpers
 )
+
+add_perf_binary(
+  misc_basic_ops_perf
+  SRCS
+    misc_basic_ops_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    .single_input_single_output_diff
+    libc.src.math.copysignf
+    libc.src.math.copysignf16
+    libc.src.math.fabsf
+    libc.src.math.fabsf16
+    libc.src.math.fmaximum_numf
+    libc.src.math.fmaximum_numf16
+    libc.src.math.fminimum_numf
+    libc.src.math.fminimum_numf16
+    libc.src.math.frexpf16
+  COMPILE_OPTIONS
+    -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index 48ae43d6315e3..e0beb729cb9f5 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -10,6 +10,7 @@
 #include "src/__support/macros/config.h"
 #include "test/src/math/performance_testing/Timer.h"
 
+#include <algorithm>
 #include <fstream>
 
 namespace LIBC_NAMESPACE_DECL {
@@ -26,16 +27,21 @@ template <typename T> class SingleInputSingleOutputPerf {
 
   static void runPerfInRange(Func myFunc, Func otherFunc,
                              StorageType startingBit, StorageType endingBit,
-                             std::ofstream &log) {
+                             size_t rounds, std::ofstream &log) {
+    size_t n = 10'010'001;
+    if (sizeof(StorageType) <= sizeof(size_t))
+      n = std::min(n, static_cast<size_t>(endingBit - startingBit));
+
     auto runner = [=](Func func) {
-      constexpr StorageType N = 10'010'001;
-      StorageType step = (endingBit - startingBit) / N;
+      StorageType step = (endingBit - startingBit) / n;
       if (step == 0)
         step = 1;
-      volatile T result;
-      for (StorageType bits = startingBit; bits < endingBit; bits += step) {
-        T x = FPBits(bits).get_val();
-        result = func(x);
+      [[maybe_unused]] volatile T result;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bits = startingBit; bits < endingBit; bits += step) {
+          T x = FPBits(bits).get_val();
+          result = func(x);
+        }
       }
     };
 
@@ -44,8 +50,7 @@ template <typename T> class SingleInputSingleOutputPerf {
     runner(myFunc);
     timer.stop();
 
-    StorageType numberOfRuns = endingBit - startingBit + 1;
-    double myAverage = static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+    double myAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
     log << "-- My function --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << myAverage << " ns/op \n";
@@ -56,8 +61,7 @@ template <typename T> class SingleInputSingleOutputPerf {
     runner(otherFunc);
     timer.stop();
 
-    double otherAverage =
-        static_cast<double>(timer.nanoseconds()) / numberOfRuns;
+    double otherAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
     log << "-- Other function --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << otherAverage << " ns/op \n";
@@ -68,15 +72,18 @@ template <typename T> class SingleInputSingleOutputPerf {
     log << "     Mine / Other's  : " << myAverage / otherAverage << " \n";
   }
 
-  static void runPerf(Func myFunc, Func otherFunc, const char *logFile) {
+  static void runPerf(Func myFunc, Func otherFunc, size_t rounds,
+                      const char *logFile) {
     std::ofstream log(logFile);
     log << " Performance tests with inputs in denormal range:\n";
     runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0),
-                   /* endingBit= */ FPBits::max_subnormal().uintval(), log);
+                   /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
+                   log);
     log << "\n Performance tests with inputs in normal range:\n";
     runPerfInRange(myFunc, otherFunc,
                    /* startingBit= */ FPBits::min_normal().uintval(),
-                   /* endingBit= */ FPBits::max_normal().uintval(), log);
+                   /* endingBit= */ FPBits::max_normal().uintval(), rounds,
+                   log);
   }
 };
 
@@ -86,6 +93,13 @@ template <typename T> class SingleInputSingleOutputPerf {
 #define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename)        \
   int main() {                                                                 \
     LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &myFunc, &otherFunc, filename);                                        \
+        &myFunc, &otherFunc, 1, filename);                                     \
     return 0;                                                                  \
   }
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds,       \
+                                           filename)                           \
+  {                                                                            \
+    LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
+        &myFunc, &otherFunc, rounds, filename);                                \
+  }
diff --git a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
new file mode 100644
index 0000000000000..6c3ee72a4c5d3
--- /dev/null
+++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
@@ -0,0 +1,153 @@
+//===-- Performance test for miscellaneous basic operations ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "SingleInputSingleOutputPerf.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/copysignf.h"
+#include "src/math/copysignf16.h"
+#include "src/math/fabsf.h"
+#include "src/math/fabsf16.h"
+#include "src/math/fmaximum_numf.h"
+#include "src/math/fmaximum_numf16.h"
+#include "src/math/fminimum_numf.h"
+#include "src/math/fminimum_numf16.h"
+#include "src/math/frexpf16.h"
+#include "test/src/math/performance_testing/Timer.h"
+
+#include <algorithm>
+#include <fstream>
+#include <math.h>
+
+namespace LIBC_NAMESPACE::testing {
+
+template <typename T> class FrexpPerf {
+  using FPBits = fputil::FPBits<T>;
+  using StorageType = typename FPBits::StorageType;
+
+public:
+  typedef T Func(T, int *);
+
+  static void run_perf_in_range(Func my_func, Func other_func,
+                                StorageType starting_bit,
+                                StorageType ending_bit, size_t rounds,
+                                std::ofstream &log) {
+    size_t n = 10'010'001;
+    if (sizeof(StorageType) <= sizeof(size_t))
+      n = std::min(n, static_cast<size_t>(ending_bit - starting_bit));
+
+    auto runner = [=](Func func) {
+      StorageType step = (ending_bit - starting_bit) / n;
+      if (step == 0)
+        step = 1;
+      [[maybe_unused]] volatile T result;
+      int result_exp;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bits = starting_bit; bits < ending_bit; bits += step) {
+          T x = FPBits(bits).get_val();
+          result = func(x, &result_exp);
+        }
+      }
+    };
+
+    Timer timer;
+    timer.start();
+    runner(my_func);
+    timer.stop();
+
+    double my_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
+    log << "-- My function --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << my_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+
+    timer.start();
+    runner(other_func);
+    timer.stop();
+
+    double other_average =
+        static_cast<double>(timer.nanoseconds()) / n / rounds;
+    log << "-- Other function --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << other_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+
+    log << "-- Average runtime ratio --\n";
+    log << "     Mine / Other's  : " << my_average / other_average << " \n";
+  }
+
+  static void run_perf(Func my_func, Func other_func, size_t rounds,
+                       const char *log_file) {
+    std::ofstream log(log_file);
+    log << " Performance tests with inputs in denormal range:\n";
+    run_perf_in_range(my_func, other_func, /* startingBit= */ StorageType(0),
+                      /* endingBit= */ FPBits::max_subnormal().uintval(),
+                      rounds, log);
+    log << "\n Performance tests with inputs in normal range:\n";
+    run_perf_in_range(my_func, other_func,
+                      /* startingBit= */ FPBits::min_normal().uintval(),
+                      /* endingBit= */ FPBits::max_normal().uintval(), rounds,
+                      log);
+  }
+};
+
+} // namespace LIBC_NAMESPACE::testing
+
+#define FREXP_PERF(T, my_func, other_func, rounds, filename)                   \
+  {                                                                            \
+    LIBC_NAMESPACE::testing::FrexpPerf<T>::run_perf(&my_func, &other_func,     \
+                                                    rounds, filename);         \
+    LIBC_NAMESPACE::testing::FrexpPerf<T>::run_perf(&my_func, &other_func,     \
+                                                    rounds, filename);         \
+  }
+
+static constexpr size_t FLOAT16_ROUNDS = 20'000;
+static constexpr size_t FLOAT_ROUNDS = 40;
+
+// LLVM libc might be the only libc implementation with support for float16 math
+// functions currently. We can't compare our float16 functions against the
+// system libc, so we compare them against this placeholder function.
+float16 placeholder_unaryf16(float16 x) { return x; }
+float16 placeholder_binaryf16(float16 x, float16 y) { return x; }
+float16 placeholder_frexpf16(float16 x, int *exp) { return x; }
+
+// The system libc might not provide the f{max,min}imum_num* C23 math functions
+// either.
+float placeholder_binaryf(float x, float y) { return x; }
+
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fabsf16,
+                                     placeholder_unaryf16, FLOAT16_ROUNDS,
+                                     "fabsf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::copysignf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "copysignf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fmaximum_numf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fmaximum_numf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fminimum_numf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fminimum_numf16_perf.log")
+  FREXP_PERF(float16, LIBC_NAMESPACE::frexpf16, placeholder_frexpf16,
+             FLOAT16_ROUNDS, "frexpf16_perf.log")
+
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fabsf, fabsf,
+                                     FLOAT_ROUNDS, "fabsf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::copysignf, copysignf,
+                                  FLOAT_ROUNDS, "copysignf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fmaximum_numf,
+                                  placeholder_binaryf, FLOAT_ROUNDS,
+                                  "fmaximum_numf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fminimum_numf,
+                                  placeholder_binaryf, FLOAT_ROUNDS,
+                                  "fminimum_numf_perf.log")
+
+  return 0;
+}

>From c422169d45ddb0fa83072736017deed681882f71 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Tue, 16 Jul 2024 14:33:32 +0200
Subject: [PATCH 2/9] fixup! [libc][math] Optimize misc basic ops using
 builtins when available

Remove support for __builtin_frexpf16 as it decreases performance.
---
 libc/cmake/modules/CheckCompilerFeatures.cmake               | 3 ---
 libc/cmake/modules/LLVMLibCCompileOptionRules.cmake          | 3 ---
 .../modules/compiler_features/check_builtin_frexpf16.cpp     | 5 -----
 libc/src/math/generic/CMakeLists.txt                         | 2 --
 libc/src/math/generic/frexpf16.cpp                           | 4 ----
 5 files changed, 17 deletions(-)
 delete mode 100644 libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a10b88fc2acaa..2bab968f901eb 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -6,7 +6,6 @@ set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
     "builtin_fmax_fmin"
-    "builtin_frexpf16"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -86,8 +85,6 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
     elseif(${feature} STREQUAL "builtin_fmax_fmin")
       set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
-    elseif(${feature} STREQUAL "builtin_frexpf16")
-      set(LIBC_COMPILER_HAS_BUILTIN_FREXPF16 TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 047468481c2f6..ead578f95ac72 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -43,9 +43,6 @@ function(_get_compile_options_from_flags output_var)
       if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
         list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
       endif()
-      if(LIBC_COMPILER_HAS_BUILTIN_FREXPF16)
-        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FREXPF16")
-      endif()
     endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
diff --git a/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
deleted file mode 100644
index e65ef54c1a298..0000000000000
--- a/libc/cmake/modules/compiler_features/check_builtin_frexpf16.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-_Float16 try_builtin_frexpf16(_Float16 x, int *exp) {
-  return __builtin_frexpf16(x, exp);
-}
-
-extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 777d649d3d4d7..3d713368251f6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1517,8 +1517,6 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.macros.properties.types
     libc.src.__support.FPUtil.manipulation_functions
-  FLAGS
-    MISC_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/frexpf16.cpp b/libc/src/math/generic/frexpf16.cpp
index 342fdd39046f8..4571b0d0ea389 100644
--- a/libc/src/math/generic/frexpf16.cpp
+++ b/libc/src/math/generic/frexpf16.cpp
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, frexpf16, (float16 x, int *exp)) {
-#ifdef __LIBC_USE_BUILTIN_FREXPF16
-  return __builtin_frexpf16(x, exp);
-#else
   return fputil::frexp(x, *exp);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From 29b47b2d9096873dbd20ab4e46b8be4664fb6ba5 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Wed, 17 Jul 2024 12:22:58 +0200
Subject: [PATCH 3/9] fixup! [libc][math] Optimize misc basic ops using
 builtins when available

Split checks for __builtin_{fmax,fmin}f16 into separate feature.
---
 libc/cmake/modules/CheckCompilerFeatures.cmake           | 3 +++
 libc/cmake/modules/LLVMLibCCompileOptionRules.cmake      | 3 +++
 .../compiler_features/check_builtin_fmax_fmin.cpp        | 7 -------
 .../compiler_features/check_builtin_fmaxf16_fminf16.cpp  | 9 +++++++++
 libc/src/math/generic/fmaximum_numf16.cpp                | 2 +-
 libc/src/math/generic/fminimum_numf16.cpp                | 2 +-
 6 files changed, 17 insertions(+), 9 deletions(-)
 create mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index 2bab968f901eb..8b02c590f11d0 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -6,6 +6,7 @@ set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
     "builtin_fmax_fmin"
+    "builtin_fmaxf16_fminf16"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -85,6 +86,8 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
     elseif(${feature} STREQUAL "builtin_fmax_fmin")
       set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
+    elseif(${feature} STREQUAL "builtin_fmaxf16_fminf16")
+      set(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16 TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index ead578f95ac72..7f982c1c495c3 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -43,6 +43,9 @@ function(_get_compile_options_from_flags output_var)
       if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
         list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
       endif()
+      if(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16)
+        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAXF16_FMINF16")
+      endif()
     endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
index a962df33e31c4..8ec927b4e170d 100644
--- a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -1,10 +1,3 @@
-_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
-  return __builtin_fmaxf16(x, y);
-}
-_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
-  return __builtin_fminf16(x, y);
-}
-
 float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
 float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
 
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
new file mode 100644
index 0000000000000..4ce377782800c
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
@@ -0,0 +1,9 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+  return __builtin_fmaxf16(x, y);
+}
+
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+  return __builtin_fminf16(x, y);
+}
+
+extern "C" void _start() {}
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index 6a012d38abea4..b71cc757518da 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+#ifdef __LIBC_USE_BUILTIN_FMAXF16_FMINF16
   return __builtin_fmaxf16(x, y);
 #else
   return fputil::fmaximum_num(x, y);
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index 8e48aaf27070f..e0ec437f9d505 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
+#ifdef __LIBC_USE_BUILTIN_FMAXF16_FMINF16
   return __builtin_fminf16(x, y);
 #else
   return fputil::fminimum_num(x, y);

>From dda59ea9090603d85270099719d92d4dd6662d52 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 18 Jul 2024 19:54:13 +0200
Subject: [PATCH 4/9] fixup! [libc][math] Optimize misc basic ops using
 builtins when available

Revert "Split checks for __builtin_{fmax,fmin}f16 into separate feature."
---
 libc/cmake/modules/CheckCompilerFeatures.cmake           | 3 ---
 libc/cmake/modules/LLVMLibCCompileOptionRules.cmake      | 3 ---
 .../compiler_features/check_builtin_fmax_fmin.cpp        | 7 +++++++
 .../compiler_features/check_builtin_fmaxf16_fminf16.cpp  | 9 ---------
 libc/src/math/generic/fmaximum_numf16.cpp                | 2 +-
 libc/src/math/generic/fminimum_numf16.cpp                | 2 +-
 6 files changed, 9 insertions(+), 17 deletions(-)
 delete mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index 8b02c590f11d0..2bab968f901eb 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -6,7 +6,6 @@ set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
     "builtin_fmax_fmin"
-    "builtin_fmaxf16_fminf16"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -86,8 +85,6 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
     elseif(${feature} STREQUAL "builtin_fmax_fmin")
       set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
-    elseif(${feature} STREQUAL "builtin_fmaxf16_fminf16")
-      set(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16 TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 7f982c1c495c3..ead578f95ac72 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -43,9 +43,6 @@ function(_get_compile_options_from_flags output_var)
       if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
         list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
       endif()
-      if(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16)
-        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAXF16_FMINF16")
-      endif()
     endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
index 8ec927b4e170d..a962df33e31c4 100644
--- a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -1,3 +1,10 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+  return __builtin_fmaxf16(x, y);
+}
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+  return __builtin_fminf16(x, y);
+}
+
 float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
 float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
 
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
deleted file mode 100644
index 4ce377782800c..0000000000000
--- a/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
-  return __builtin_fmaxf16(x, y);
-}
-
-_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
-  return __builtin_fminf16(x, y);
-}
-
-extern "C" void _start() {}
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index b71cc757518da..6a012d38abea4 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAXF16_FMINF16
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
   return __builtin_fmaxf16(x, y);
 #else
   return fputil::fmaximum_num(x, y);
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index e0ec437f9d505..8e48aaf27070f 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAXF16_FMINF16
+#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
   return __builtin_fminf16(x, y);
 #else
   return fputil::fminimum_num(x, y);

>From d405f8b09f4ac20e6072df64c36e76fa768d8d07 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 18 Jul 2024 20:03:39 +0200
Subject: [PATCH 5/9] fixup! [libc][math] Optimize misc basic ops using
 builtins when available

Remove support for __builtin_{fmax,fmin}* due to incorrect results on x86-64 and AArch64.
---
 libc/cmake/modules/CheckCompilerFeatures.cmake   |  3 ---
 .../modules/LLVMLibCCompileOptionRules.cmake     |  3 ---
 .../check_builtin_fmax_fmin.cpp                  | 14 --------------
 libc/src/math/generic/CMakeLists.txt             | 12 ------------
 libc/src/math/generic/fmaximum_num.cpp           |  6 +-----
 libc/src/math/generic/fmaximum_numf.cpp          |  6 +-----
 libc/src/math/generic/fmaximum_numf16.cpp        |  4 ----
 libc/src/math/generic/fminimum_num.cpp           |  6 +-----
 libc/src/math/generic/fminimum_numf.cpp          |  6 +-----
 libc/src/math/generic/fminimum_numf16.cpp        |  4 ----
 .../src/math/performance_testing/CMakeLists.txt  |  4 ----
 .../performance_testing/misc_basic_ops_perf.cpp  | 16 ----------------
 12 files changed, 4 insertions(+), 80 deletions(-)
 delete mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index 2bab968f901eb..a6d793d495c45 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -5,7 +5,6 @@
 set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
-    "builtin_fmax_fmin"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -83,8 +82,6 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
     elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
-    elseif(${feature} STREQUAL "builtin_fmax_fmin")
-      set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index ead578f95ac72..c1b3ab6831dab 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -40,9 +40,6 @@ function(_get_compile_options_from_flags output_var)
     endif()
     if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
       list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
-      if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
-        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
-      endif()
     endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
deleted file mode 100644
index a962df33e31c4..0000000000000
--- a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
-  return __builtin_fmaxf16(x, y);
-}
-_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
-  return __builtin_fminf16(x, y);
-}
-
-float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
-float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
-
-double try_builtin_fmax(double x, double y) { return __builtin_fmax(x, y); }
-double try_builtin_fmin(double x, double y) { return __builtin_fmin(x, y); }
-
-extern "C" void _start() {}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 3d713368251f6..568ee245fa74e 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2214,8 +2214,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
-  FLAGS
-    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2228,8 +2226,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
-  FLAGS
-    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2255,8 +2251,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
-  FLAGS
-    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2468,8 +2462,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
-  FLAGS
-    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2482,8 +2474,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O2
-  FLAGS
-    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2509,8 +2499,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
-  FLAGS
-    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/fmaximum_num.cpp b/libc/src/math/generic/fmaximum_num.cpp
index 1bfc1514393ee..33df7daa380df 100644
--- a/libc/src/math/generic/fmaximum_num.cpp
+++ b/libc/src/math/generic/fmaximum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_num function ---------------------------===//
+//===-- Implementation of fmaximum_num function----------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fmaximum_num, (double x, double y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
-  return __builtin_fmax(x, y);
-#else
   return fputil::fmaximum_num(x, y);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf.cpp b/libc/src/math/generic/fmaximum_numf.cpp
index f8c69fa78be3d..1577080ba2c25 100644
--- a/libc/src/math/generic/fmaximum_numf.cpp
+++ b/libc/src/math/generic/fmaximum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fmaximum_numf function --------------------------===//
+//===-- Implementation of fmaximum_numf function---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, fmaximum_numf, (float x, float y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
-  return __builtin_fmaxf(x, y);
-#else
   return fputil::fmaximum_num(x, y);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fmaximum_numf16.cpp b/libc/src/math/generic/fmaximum_numf16.cpp
index 6a012d38abea4..394ce8b5fe4f3 100644
--- a/libc/src/math/generic/fmaximum_numf16.cpp
+++ b/libc/src/math/generic/fmaximum_numf16.cpp
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fmaximum_numf16, (float16 x, float16 y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
-  return __builtin_fmaxf16(x, y);
-#else
   return fputil::fmaximum_num(x, y);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_num.cpp b/libc/src/math/generic/fminimum_num.cpp
index 5b9c426ca50c2..3ff79def58075 100644
--- a/libc/src/math/generic/fminimum_num.cpp
+++ b/libc/src/math/generic/fminimum_num.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_num function ---------------------------===//
+//===-- Implementation of fminimum_num function----------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(double, fminimum_num, (double x, double y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
-  return __builtin_fmin(x, y);
-#else
   return fputil::fminimum_num(x, y);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf.cpp b/libc/src/math/generic/fminimum_numf.cpp
index 6b6f905e63de3..c7ac99b14bd5a 100644
--- a/libc/src/math/generic/fminimum_numf.cpp
+++ b/libc/src/math/generic/fminimum_numf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fminimum_numf function --------------------------===//
+//===-- Implementation of fminimum_numf function---------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, fminimum_numf, (float x, float y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
-  return __builtin_fminf(x, y);
-#else
   return fputil::fminimum_num(x, y);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fminimum_numf16.cpp b/libc/src/math/generic/fminimum_numf16.cpp
index 8e48aaf27070f..0af7205713c10 100644
--- a/libc/src/math/generic/fminimum_numf16.cpp
+++ b/libc/src/math/generic/fminimum_numf16.cpp
@@ -14,11 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fminimum_numf16, (float16 x, float16 y)) {
-#ifdef __LIBC_USE_BUILTIN_FMAX_FMIN
-  return __builtin_fminf16(x, y);
-#else
   return fputil::fminimum_num(x, y);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index a4059c8ff4dd8..65374f5bbbe19 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -414,10 +414,6 @@ add_perf_binary(
     libc.src.math.copysignf16
     libc.src.math.fabsf
     libc.src.math.fabsf16
-    libc.src.math.fmaximum_numf
-    libc.src.math.fmaximum_numf16
-    libc.src.math.fminimum_numf
-    libc.src.math.fminimum_numf16
     libc.src.math.frexpf16
   COMPILE_OPTIONS
     -fno-builtin
diff --git a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
index 6c3ee72a4c5d3..711e1c5455198 100644
--- a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
+++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
@@ -13,10 +13,6 @@
 #include "src/math/copysignf16.h"
 #include "src/math/fabsf.h"
 #include "src/math/fabsf16.h"
-#include "src/math/fmaximum_numf.h"
-#include "src/math/fmaximum_numf16.h"
-#include "src/math/fminimum_numf.h"
-#include "src/math/fminimum_numf16.h"
 #include "src/math/frexpf16.h"
 #include "test/src/math/performance_testing/Timer.h"
 
@@ -129,12 +125,6 @@ int main() {
   BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::copysignf16,
                                   placeholder_binaryf16, FLOAT16_ROUNDS,
                                   "copysignf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fmaximum_numf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fmaximum_numf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fminimum_numf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fminimum_numf16_perf.log")
   FREXP_PERF(float16, LIBC_NAMESPACE::frexpf16, placeholder_frexpf16,
              FLOAT16_ROUNDS, "frexpf16_perf.log")
 
@@ -142,12 +132,6 @@ int main() {
                                      FLOAT_ROUNDS, "fabsf_perf.log")
   BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::copysignf, copysignf,
                                   FLOAT_ROUNDS, "copysignf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fmaximum_numf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fmaximum_numf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fminimum_numf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fminimum_numf_perf.log")
 
   return 0;
 }

>From 147b327276af8ae87580df20a2dab6f2999c9093 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 18 Jul 2024 20:11:45 +0200
Subject: [PATCH 6/9] fixup! [libc][math] Optimize misc basic ops using
 builtins when available

Remove benchmark for frexpf16.
---
 .../math/performance_testing/CMakeLists.txt   |  1 -
 .../misc_basic_ops_perf.cpp                   | 96 -------------------
 2 files changed, 97 deletions(-)

diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index 65374f5bbbe19..f2f100512dfa1 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -414,7 +414,6 @@ add_perf_binary(
     libc.src.math.copysignf16
     libc.src.math.fabsf
     libc.src.math.fabsf16
-    libc.src.math.frexpf16
   COMPILE_OPTIONS
     -fno-builtin
 )
diff --git a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
index 711e1c5455198..ace1d21c62c32 100644
--- a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
+++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
@@ -8,102 +8,13 @@
 
 #include "BinaryOpSingleOutputPerf.h"
 #include "SingleInputSingleOutputPerf.h"
-#include "src/__support/FPUtil/FPBits.h"
 #include "src/math/copysignf.h"
 #include "src/math/copysignf16.h"
 #include "src/math/fabsf.h"
 #include "src/math/fabsf16.h"
-#include "src/math/frexpf16.h"
-#include "test/src/math/performance_testing/Timer.h"
 
-#include <algorithm>
-#include <fstream>
 #include <math.h>
 
-namespace LIBC_NAMESPACE::testing {
-
-template <typename T> class FrexpPerf {
-  using FPBits = fputil::FPBits<T>;
-  using StorageType = typename FPBits::StorageType;
-
-public:
-  typedef T Func(T, int *);
-
-  static void run_perf_in_range(Func my_func, Func other_func,
-                                StorageType starting_bit,
-                                StorageType ending_bit, size_t rounds,
-                                std::ofstream &log) {
-    size_t n = 10'010'001;
-    if (sizeof(StorageType) <= sizeof(size_t))
-      n = std::min(n, static_cast<size_t>(ending_bit - starting_bit));
-
-    auto runner = [=](Func func) {
-      StorageType step = (ending_bit - starting_bit) / n;
-      if (step == 0)
-        step = 1;
-      [[maybe_unused]] volatile T result;
-      int result_exp;
-      for (size_t i = 0; i < rounds; i++) {
-        for (StorageType bits = starting_bit; bits < ending_bit; bits += step) {
-          T x = FPBits(bits).get_val();
-          result = func(x, &result_exp);
-        }
-      }
-    };
-
-    Timer timer;
-    timer.start();
-    runner(my_func);
-    timer.stop();
-
-    double my_average = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- My function --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << my_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
-
-    timer.start();
-    runner(other_func);
-    timer.stop();
-
-    double other_average =
-        static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- Other function --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << other_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
-
-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << my_average / other_average << " \n";
-  }
-
-  static void run_perf(Func my_func, Func other_func, size_t rounds,
-                       const char *log_file) {
-    std::ofstream log(log_file);
-    log << " Performance tests with inputs in denormal range:\n";
-    run_perf_in_range(my_func, other_func, /* startingBit= */ StorageType(0),
-                      /* endingBit= */ FPBits::max_subnormal().uintval(),
-                      rounds, log);
-    log << "\n Performance tests with inputs in normal range:\n";
-    run_perf_in_range(my_func, other_func,
-                      /* startingBit= */ FPBits::min_normal().uintval(),
-                      /* endingBit= */ FPBits::max_normal().uintval(), rounds,
-                      log);
-  }
-};
-
-} // namespace LIBC_NAMESPACE::testing
-
-#define FREXP_PERF(T, my_func, other_func, rounds, filename)                   \
-  {                                                                            \
-    LIBC_NAMESPACE::testing::FrexpPerf<T>::run_perf(&my_func, &other_func,     \
-                                                    rounds, filename);         \
-    LIBC_NAMESPACE::testing::FrexpPerf<T>::run_perf(&my_func, &other_func,     \
-                                                    rounds, filename);         \
-  }
-
 static constexpr size_t FLOAT16_ROUNDS = 20'000;
 static constexpr size_t FLOAT_ROUNDS = 40;
 
@@ -112,11 +23,6 @@ static constexpr size_t FLOAT_ROUNDS = 40;
 // system libc, so we compare them against this placeholder function.
 float16 placeholder_unaryf16(float16 x) { return x; }
 float16 placeholder_binaryf16(float16 x, float16 y) { return x; }
-float16 placeholder_frexpf16(float16 x, int *exp) { return x; }
-
-// The system libc might not provide the f{max,min}imum_num* C23 math functions
-// either.
-float placeholder_binaryf(float x, float y) { return x; }
 
 int main() {
   SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fabsf16,
@@ -125,8 +31,6 @@ int main() {
   BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::copysignf16,
                                   placeholder_binaryf16, FLOAT16_ROUNDS,
                                   "copysignf16_perf.log")
-  FREXP_PERF(float16, LIBC_NAMESPACE::frexpf16, placeholder_frexpf16,
-             FLOAT16_ROUNDS, "frexpf16_perf.log")
 
   SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fabsf, fabsf,
                                      FLOAT_ROUNDS, "fabsf_perf.log")

>From 9b8565976a96bf69a43a5f04366f111ec55955ed Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Mon, 22 Jul 2024 15:19:17 +0200
Subject: [PATCH 7/9] fixup! [libc][math] Optimize misc basic ops using
 builtins when available

Disable __builtin_fabsf16 on GCC for x86.
---
 libc/src/math/generic/CMakeLists.txt | 2 ++
 libc/src/math/generic/fabsf16.cpp    | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 568ee245fa74e..41b128c2705a9 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -439,6 +439,8 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.macros.properties.types
     libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.macros.properties.architectures
+    libc.src.__support.macros.properties.compiler
   COMPILE_OPTIONS
     -O3
   FLAGS
diff --git a/libc/src/math/generic/fabsf16.cpp b/libc/src/math/generic/fabsf16.cpp
index 2f982517614c4..02e11330db718 100644
--- a/libc/src/math/generic/fabsf16.cpp
+++ b/libc/src/math/generic/fabsf16.cpp
@@ -10,11 +10,16 @@
 #include "src/__support/FPUtil/BasicOperations.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/compiler.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, fabsf16, (float16 x)) {
-#ifdef __LIBC_MISC_MATH_BASIC_OPS_OPT
+  // For x86, GCC generates better code from the generic implementation.
+  // https://godbolt.org/z/K9orM4hTa
+#if defined(__LIBC_MISC_MATH_BASIC_OPS_OPT) &&                                 \
+    !(defined(LIBC_TARGET_ARCH_IS_X86) && defined(LIBC_COMPILER_IS_GCC))
   return __builtin_fabsf16(x);
 #else
   return fputil::abs(x);

>From 2189647f1fdf6b436da7139346c8675b9f6a8f0d Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Mon, 22 Jul 2024 17:26:32 +0200
Subject: [PATCH 8/9] [libc][math] Change fabs* functions' compile options from
 -O2 to -O3

---
 libc/src/math/generic/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 41b128c2705a9..c5cce32793060 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -399,7 +399,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
   FLAGS
     MISC_MATH_BASIC_OPS_OPT
 )
@@ -413,7 +413,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
   FLAGS
     MISC_MATH_BASIC_OPS_OPT
 )
@@ -427,7 +427,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(

>From 0a0818efffd573d4509150dea5b1ecdbe68bcec5 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Mon, 22 Jul 2024 17:29:41 +0200
Subject: [PATCH 9/9] [libc][math] Switch from <algorithm> to CPP/algorithm.h
 in perf tests

---
 .../src/math/performance_testing/BinaryOpSingleOutputPerf.h   | 4 ++--
 libc/test/src/math/performance_testing/CMakeLists.txt         | 4 ++++
 .../math/performance_testing/SingleInputSingleOutputPerf.h    | 4 ++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 1ab0afbc9cbe8..63d9768e21899 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/CPP/algorithm.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/macros/config.h"
 #include "test/src/math/performance_testing/Timer.h"
 
-#include <algorithm>
 #include <cstddef>
 #include <fstream>
 
@@ -30,7 +30,7 @@ template <typename T> class BinaryOpSingleOutputPerf {
                                 StorageType startingBit, StorageType endingBit,
                                 size_t N, size_t rounds, std::ofstream &log) {
     if (sizeof(StorageType) <= sizeof(size_t))
-      N = std::min(N, static_cast<size_t>(endingBit - startingBit));
+      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
 
     auto runner = [=](Func func) {
       [[maybe_unused]] volatile T result;
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index f2f100512dfa1..be55419c087df 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -95,6 +95,9 @@ add_header_library(
   single_input_single_output_diff
   HDRS
     SingleInputSingleOutputPerf.h
+  DEPENDS
+    libc.src.__support.CPP.algorithm
+    libc.src.__support.FPUtil.fp_bits
 )
 
 add_header_library(
@@ -102,6 +105,7 @@ add_header_library(
   HDRS
     BinaryOpSingleOutputPerf.h
   DEPENDS
+    libc.src.__support.CPP.algorithm
     libc.src.__support.FPUtil.fp_bits
 )
 
diff --git a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
index e0beb729cb9f5..efad1259d6bf1 100644
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/CPP/algorithm.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/macros/config.h"
 #include "test/src/math/performance_testing/Timer.h"
 
-#include <algorithm>
 #include <fstream>
 
 namespace LIBC_NAMESPACE_DECL {
@@ -30,7 +30,7 @@ template <typename T> class SingleInputSingleOutputPerf {
                              size_t rounds, std::ofstream &log) {
     size_t n = 10'010'001;
     if (sizeof(StorageType) <= sizeof(size_t))
-      n = std::min(n, static_cast<size_t>(endingBit - startingBit));
+      n = cpp::min(n, static_cast<size_t>(endingBit - startingBit));
 
     auto runner = [=](Func func) {
       StorageType step = (endingBit - startingBit) / n;