[libc-commits] [libc] [libc][math] Optimize maximum and minimum functions using builtins when available (PR #100002)

Mon Jul 22 16:18:52 PDT 2024

https://github.com/overmighty created https://github.com/llvm/llvm-project/pull/100002

None

>From 441993ff267ce7192e3ca88e309fef425b483f0d Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Mon, 22 Jul 2024 20:11:49 +0200
Subject: [PATCH] [libc][math] Optimize maximum and minimum functions using
 builtins when available

---
 .../cmake/modules/CheckCompilerFeatures.cmake |   6 +
 .../modules/LLVMLibCCheckCpuFeatures.cmake    |   1 +
 .../modules/LLVMLibCCompileOptionRules.cmake  |  11 ++
 .../check_builtin_fmax_fmin.cpp               |   7 ++
 .../check_builtin_fmaxf16_fminf16.cpp         |   9 ++
 .../modules/cpu_features/check_FullFP16.cpp   |   5 +
 libc/src/__support/FPUtil/BasicOperations.h   | 116 ++++++++++++++----
 libc/src/__support/FPUtil/CMakeLists.txt      |   2 +
 .../macros/properties/cpu_features.h          |   4 +
 libc/src/math/generic/CMakeLists.txt          |  72 ++++++++---
 .../math/performance_testing/CMakeLists.txt   |  22 ++++
 .../max_min_funcs_perf.cpp                    |  75 +++++++++++
 12 files changed, 288 insertions(+), 42 deletions(-)
 create mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
 create mode 100644 libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
 create mode 100644 libc/cmake/modules/cpu_features/check_FullFP16.cpp
 create mode 100644 libc/test/src/math/performance_testing/max_min_funcs_perf.cpp

diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a6d793d495c45..8b02c590f11d0 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -5,6 +5,8 @@
 set(
   ALL_COMPILER_FEATURES
     "builtin_ceil_floor_rint_trunc"
+    "builtin_fmax_fmin"
+    "builtin_fmaxf16_fminf16"
     "builtin_round"
     "builtin_roundeven"
     "float16"
@@ -82,6 +84,10 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
       set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
     elseif(${feature} STREQUAL "builtin_ceil_floor_rint_trunc")
       set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_RINT_TRUNC TRUE)
+    elseif(${feature} STREQUAL "builtin_fmax_fmin")
+      set(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN TRUE)
+    elseif(${feature} STREQUAL "builtin_fmaxf16_fminf16")
+      set(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16 TRUE)
     elseif(${feature} STREQUAL "builtin_round")
       set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
     elseif(${feature} STREQUAL "builtin_roundeven")
diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index 73b249374a066..be569f6f9cabf 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -9,6 +9,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
   set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
   set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
 elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+  set(ALL_CPU_FEATURES "FullFP16")
   set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
 endif()
 
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 97d1c7262d24d..0f1ef6a575277 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -40,6 +40,17 @@ function(_get_compile_options_from_flags output_var)
     endif()
     if(ADD_MISC_MATH_BASIC_OPS_OPT_FLAG)
       list(APPEND compile_options "-D__LIBC_MISC_MATH_BASIC_OPS_OPT")
+      if(LIBC_COMPILER_HAS_BUILTIN_FMAX_FMIN)
+        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAX_FMIN")
+      endif()
+      if(LIBC_COMPILER_HAS_BUILTIN_FMAXF16_FMINF16)
+        list(APPEND compile_options "-D__LIBC_USE_BUILTIN_FMAXF16_FMINF16")
+      endif()
+      if("FullFP16" IN_LIST LIBC_CPU_FEATURES AND
+         CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+        list(APPEND compile_options
+             "SHELL:-Xclang -target-feature -Xclang +fullfp16")
+      endif()
     endif()
   elseif(MSVC)
     if(ADD_FMA_FLAG)
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
new file mode 100644
index 0000000000000..594d839f174bc
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmax_fmin.cpp
@@ -0,0 +1,7 @@
+float try_builtin_fmaxf(float x, float y) { return __builtin_fmaxf(x, y); }
+float try_builtin_fminf(float x, float y) { return __builtin_fminf(x, y); }
+
+double try_builtin_fmaxf(double x, double y) { return __builtin_fmax(x, y); }
+double try_builtin_fminf(double x, double y) { return __builtin_fmin(x, y); }
+
+extern "C" void _start() {}
diff --git a/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp b/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
new file mode 100644
index 0000000000000..4ce377782800c
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_fmaxf16_fminf16.cpp
@@ -0,0 +1,9 @@
+_Float16 try_builtin_fmaxf16(_Float16 x, _Float16 y) {
+  return __builtin_fmaxf16(x, y);
+}
+
+_Float16 try_builtin_fminf16(_Float16 x, _Float16 y) {
+  return __builtin_fminf16(x, y);
+}
+
+extern "C" void _start() {}
diff --git a/libc/cmake/modules/cpu_features/check_FullFP16.cpp b/libc/cmake/modules/cpu_features/check_FullFP16.cpp
new file mode 100644
index 0000000000000..b757fccc86e4c
--- /dev/null
+++ b/libc/cmake/modules/cpu_features/check_FullFP16.cpp
@@ -0,0 +1,5 @@
+#include "src/__support/macros/properties/cpu_features.h"
+
+#ifndef LIBC_TARGET_CPU_HAS_FULLFP16
+#error unsupported
+#endif
diff --git a/libc/src/__support/FPUtil/BasicOperations.h b/libc/src/__support/FPUtil/BasicOperations.h
index 3b7d7a5c249ae..83c319e151c61 100644
--- a/libc/src/__support/FPUtil/BasicOperations.h
+++ b/libc/src/__support/FPUtil/BasicOperations.h
@@ -17,6 +17,8 @@
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/types.h"
 #include "src/__support/uint128.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -27,6 +29,90 @@ LIBC_INLINE T abs(T x) {
   return FPBits<T>(x).abs().get_val();
 }
 
+namespace internal {
+
+template <typename T>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<T>, T> max(T x, T y) {
+  FPBits<T> x_bits(x);
+  FPBits<T> y_bits(y);
+
+  // To make sure that fmax(+0, -0) == +0 == fmax(-0, +0), whenever x and y
+  // have different signs and both are not NaNs, we return the number with
+  // positive sign.
+  if (x_bits.sign() != y_bits.sign())
+    return x_bits.is_pos() ? x : y;
+  return x > y ? x : y;
+}
+
+#if defined(__LIBC_USE_BUILTIN_FMAXF16_FMINF16)
+template <> LIBC_INLINE float16 max(float16 x, float16 y) {
+  return __builtin_fmaxf16(x, y);
+}
+#elif !defined(LIBC_TARGET_ARCH_IS_AARCH64)
+template <> LIBC_INLINE float16 max(float16 x, float16 y) {
+  FPBits<float16> x_bits(x);
+  FPBits<float16> y_bits(y);
+
+  // If x and y have different signs, then the one with the lesser uintval is
+  // positive (has the sign bit set to 0), and therefore the maximum.
+  if (x_bits.sign() != y_bits.sign())
+    return x_bits.uintval() < y_bits.uintval() ? x : y;
+  return ((x_bits.uintval() > y_bits.uintval()) == x_bits.is_pos()) ? x : y;
+}
+#endif
+
+#if defined(__LIBC_USE_BUILTIN_FMAX_FMIN) && !defined(LIBC_TARGET_ARCH_IS_X86)
+template <> LIBC_INLINE float max(float x, float y) {
+  return __builtin_fmaxf(x, y);
+}
+
+template <> LIBC_INLINE double max(double x, double y) {
+  return __builtin_fmax(x, y);
+}
+#endif
+
+template <typename T>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<T>, T> min(T x, T y) {
+  FPBits<T> x_bits(x);
+  FPBits<T> y_bits(y);
+
+  // To make sure that fmin(+0, -0) == -0 == fmin(-0, +0), whenever x and y have
+  // different signs and both are not NaNs, we return the number with negative
+  // sign.
+  if (x_bits.sign() != y_bits.sign())
+    return x_bits.is_neg() ? x : y;
+  return x < y ? x : y;
+}
+
+#if defined(__LIBC_USE_BUILTIN_FMAXF16_FMINF16)
+template <> LIBC_INLINE float16 min(float16 x, float16 y) {
+  return __builtin_fminf16(x, y);
+}
+#elif !defined(LIBC_TARGET_ARCH_IS_AARCH64)
+template <> LIBC_INLINE float16 min(float16 x, float16 y) {
+  FPBits<float16> x_bits(x);
+  FPBits<float16> y_bits(y);
+
+  // If x and y have different signs, then the one with the greater uintval is
+  // negative (has the sign bit set to 1), and therefore the minimum.
+  if (x_bits.sign() != y_bits.sign())
+    return x_bits.uintval() > y_bits.uintval() ? x : y;
+  return ((x_bits.uintval() < y_bits.uintval()) == x_bits.is_pos()) ? x : y;
+}
+#endif
+
+#if defined(__LIBC_USE_BUILTIN_FMAX_FMIN) && !defined(LIBC_TARGET_ARCH_IS_X86)
+template <> LIBC_INLINE float min(float x, float y) {
+  return __builtin_fminf(x, y);
+}
+
+template <> LIBC_INLINE double min(double x, double y) {
+  return __builtin_fmin(x, y);
+}
+#endif
+
+} // namespace internal
+
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
 LIBC_INLINE T fmin(T x, T y) {
   const FPBits<T> bitx(x), bity(y);
@@ -35,12 +121,7 @@ LIBC_INLINE T fmin(T x, T y) {
     return y;
   if (bity.is_nan())
     return x;
-  if (bitx.sign() != bity.sign())
-    // To make sure that fmin(+0, -0) == -0 == fmin(-0, +0), whenever x and
-    // y has different signs and both are not NaNs, we return the number
-    // with negative sign.
-    return bitx.is_neg() ? x : y;
-  return x < y ? x : y;
+  return internal::min(x, y);
 }
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -51,12 +132,7 @@ LIBC_INLINE T fmax(T x, T y) {
     return y;
   if (bity.is_nan())
     return x;
-  if (bitx.sign() != bity.sign())
-    // To make sure that fmax(+0, -0) == +0 == fmax(-0, +0), whenever x and
-    // y has different signs and both are not NaNs, we return the number
-    // with positive sign.
-    return bitx.is_neg() ? y : x;
-  return x > y ? x : y;
+  return internal::max(x, y);
 }
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -67,9 +143,7 @@ LIBC_INLINE T fmaximum(T x, T y) {
     return x;
   if (bity.is_nan())
     return y;
-  if (bitx.sign() != bity.sign())
-    return (bitx.is_neg() ? y : x);
-  return x > y ? x : y;
+  return internal::max(x, y);
 }
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -80,9 +154,7 @@ LIBC_INLINE T fminimum(T x, T y) {
     return x;
   if (bity.is_nan())
     return y;
-  if (bitx.sign() != bity.sign())
-    return (bitx.is_neg()) ? x : y;
-  return x < y ? x : y;
+  return internal::min(x, y);
 }
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -97,9 +169,7 @@ LIBC_INLINE T fmaximum_num(T x, T y) {
     return y;
   if (bity.is_nan())
     return x;
-  if (bitx.sign() != bity.sign())
-    return (bitx.is_neg() ? y : x);
-  return x > y ? x : y;
+  return internal::max(x, y);
 }
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
@@ -114,9 +184,7 @@ LIBC_INLINE T fminimum_num(T x, T y) {
     return y;
   if (bity.is_nan())
     return x;
-  if (bitx.sign() != bity.sign())
-    return (bitx.is_neg() ? x : y);
-  return x < y ? x : y;
+  return internal::min(x, y);
 }
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 8804f3a4d5e23..bfdfffb85c255 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -192,6 +192,8 @@ add_header_library(
     libc.src.__support.uint128
     libc.src.__support.common
     libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.architectures
+    libc.src.__support.macros.properties.types
 )
 
 add_header_library(
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index 80d48be702070..8d431f203335f 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -14,6 +14,10 @@
 
 #include "architectures.h"
 
+#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
+#define LIBC_TARGET_CPU_HAS_FULLFP16
+#endif
+
 #if defined(__SSE2__)
 #define LIBC_TARGET_CPU_HAS_SSE2
 #endif
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 74360edff3f9a..ef96d6c4c2f37 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2065,7 +2065,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2077,7 +2079,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2089,7 +2093,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -2116,6 +2120,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 
@@ -2128,7 +2134,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2140,7 +2148,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2152,7 +2162,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -2179,6 +2189,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2190,7 +2202,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2202,7 +2216,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2214,7 +2230,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -2228,6 +2244,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2252,7 +2270,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2264,7 +2284,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2276,7 +2298,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -2290,6 +2312,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2438,7 +2462,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2450,7 +2476,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2462,7 +2490,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -2476,6 +2504,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2500,7 +2530,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2512,7 +2544,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
@@ -2524,7 +2558,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -2538,6 +2572,8 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.basic_operations
   COMPILE_OPTIONS
     -O3
+  FLAGS
+    MISC_MATH_BASIC_OPS_OPT
 )
 
 add_entrypoint_object(
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index 72e1a730ac756..536338bf2a56a 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -421,3 +421,25 @@ add_perf_binary(
   COMPILE_OPTIONS
     -fno-builtin
 )
+
+add_perf_binary(
+  max_min_funcs_perf
+  SRCS
+    max_min_funcs_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.fmaxf
+    libc.src.math.fmaxf16
+    libc.src.math.fmaximumf
+    libc.src.math.fmaximumf16
+    libc.src.math.fmaximum_numf
+    libc.src.math.fmaximum_numf16
+    libc.src.math.fminf
+    libc.src.math.fminf16
+    libc.src.math.fminimumf
+    libc.src.math.fminimumf16
+    libc.src.math.fminimum_numf
+    libc.src.math.fminimum_numf16
+  COMPILE_OPTIONS
+    -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp b/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
new file mode 100644
index 0000000000000..9540112e69ea6
--- /dev/null
+++ b/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
@@ -0,0 +1,75 @@
+//===-- Performance test for maximum and minimum functions ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "src/math/fmaxf.h"
+#include "src/math/fmaxf16.h"
+#include "src/math/fmaximum_numf.h"
+#include "src/math/fmaximum_numf16.h"
+#include "src/math/fmaximumf.h"
+#include "src/math/fmaximumf16.h"
+#include "src/math/fminf.h"
+#include "src/math/fminf16.h"
+#include "src/math/fminimum_numf.h"
+#include "src/math/fminimum_numf16.h"
+#include "src/math/fminimumf.h"
+#include "src/math/fminimumf16.h"
+
+#include <math.h>
+
+static constexpr size_t FLOAT16_ROUNDS = 20'000;
+static constexpr size_t FLOAT_ROUNDS = 40;
+
+// LLVM libc might be the only libc implementation with support for float16 math
+// functions currently. We can't compare our float16 functions against the
+// system libc, so we compare them against this placeholder function.
+float16 placeholder_binaryf16(float16 x, float16 y) { return x; }
+
+// The system libc might not provide the fmaximum* and fminimum* C23 math
+// functions either.
+float placeholder_binaryf(float x, float y) { return x; }
+
+int main() {
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fmaxf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fmaxf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fminf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fminf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fmaximumf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fmaximumf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fminimumf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fminimumf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fmaximum_numf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fmaximum_numf16_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fminimum_numf16,
+                                  placeholder_binaryf16, FLOAT16_ROUNDS,
+                                  "fminimum_numf16_perf.log")
+
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fmaxf, ::fmaxf,
+                                  FLOAT_ROUNDS, "fmaxf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fminf, ::fminf,
+                                  FLOAT_ROUNDS, "fminf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fmaximumf,
+                                  placeholder_binaryf, FLOAT_ROUNDS,
+                                  "fmaximumf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fminimumf,
+                                  placeholder_binaryf, FLOAT_ROUNDS,
+                                  "fminimumf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fmaximum_numf,
+                                  placeholder_binaryf, FLOAT_ROUNDS,
+                                  "fmaximum_numf_perf.log")
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fminimum_numf,
+                                  placeholder_binaryf, FLOAT_ROUNDS,
+                                  "fminimum_numf_perf.log")
+
+  return 0;
+}