[libc-commits] [libc] [llvm] [libc][math] Refactor pow to Header Only. (PR #176529)

via libc-commits libc-commits at lists.llvm.org
Fri Jan 16 17:39:15 PST 2026


https://github.com/AnonMiraj created https://github.com/llvm/llvm-project/pull/176529

@bassiounix 

closes : #176516 

>From 99aef88611d074be801ae01dc875583c78410191 Mon Sep 17 00:00:00 2001
From: anonmiraj <nabilmalek48 at gmail.com>
Date: Sat, 17 Jan 2026 03:38:16 +0200
Subject: [PATCH] [libc][math] Refactor pow to Header Only.

---
 libc/shared/math.h                            |   1 +
 libc/shared/math/pow.h                        |  22 +
 libc/src/__support/math/CMakeLists.txt        |  19 +
 libc/src/__support/math/pow.h                 | 544 ++++++++++++++++++
 libc/src/math/generic/CMakeLists.txt          |  13 +-
 libc/src/math/generic/pow.cpp                 | 523 +----------------
 libc/test/shared/CMakeLists.txt               |   1 +
 libc/test/shared/shared_math_test.cpp         |   1 +
 .../llvm-project-overlay/libc/BUILD.bazel     |  26 +-
 9 files changed, 612 insertions(+), 538 deletions(-)
 create mode 100644 libc/shared/math/pow.h
 create mode 100644 libc/src/__support/math/pow.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 017c94f8ad54a..5ebadfb530682 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -74,6 +74,7 @@
 #include "math/logbf.h"
 #include "math/logbf128.h"
 #include "math/logbf16.h"
+#include "math/pow.h"
 #include "math/rsqrtf.h"
 #include "math/rsqrtf16.h"
 #include "math/sin.h"
diff --git a/libc/shared/math/pow.h b/libc/shared/math/pow.h
new file mode 100644
index 0000000000000..a5a31d70ed0ca
--- /dev/null
+++ b/libc/shared/math/pow.h
@@ -0,0 +1,22 @@
+//===-- Shared pow function -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_POW_H
+#define LLVM_LIBC_SHARED_MATH_POW_H
+#include "shared/libc_common.h"
+#include "src/__support/math/pow.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::pow;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_POW_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index cb03b18678145..05d0ee2c4561c 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -1139,6 +1139,25 @@ add_header_library(
     libc.src.__support.uint128
 )
 
+add_header_library(
+  pow
+  HDRS
+    pow.h
+  DEPENDS
+    libc.src.__support.CPP.bit
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.FPUtil.triple_double
+    libc.src.__support.macros.optimization
+    libc.src.__support.math.common_constants
+    libc.src.__support.math.exp10f
+    libc.src.__support.math.exp2f
+)
+
 add_header_library(
   sin
   HDRS
diff --git a/libc/src/__support/math/pow.h b/libc/src/__support/math/pow.h
new file mode 100644
index 0000000000000..8e4507cf0242f
--- /dev/null
+++ b/libc/src/__support/math/pow.h
@@ -0,0 +1,544 @@
+//===-- Implementation header for pow ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_POW_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_POW_H
+
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/sqrt.h" // Speedup for pow(x, 1/2) = sqrt(x)
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "src/__support/math/exp_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+using fputil::DoubleDouble;
+
+namespace {
+
+using namespace common_constants_internal;
+
+// Constants for log2(x) range reduction, generated by Sollya with:
+// > for i from 0 to 127 do {
+//     r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) );
+//     b = nearestint(log2(r) * 2^41) * 2^-41;
+//     c = round(log2(r) - b, D, RN);
+//     print("{", -c, ",", -b, "},");
+//   };
+// This is the same as -log2(RD[i]), with the least significant bits of the
+// high part set to be 2^-41, so that the sum of high parts + e_x is exact in
+// double precision.
+// We also replace the first and the last ones to be 0.
+constexpr DoubleDouble LOG2_R_DD[128] = {
+    {0.0, 0.0},
+    {-0x1.19b14945cf6bap-44, 0x1.72c7ba21p-7},
+    {-0x1.95539356f93dcp-43, 0x1.743ee862p-6},
+    {0x1.abe0a48f83604p-43, 0x1.184b8e4c5p-5},
+    {0x1.635577970e04p-43, 0x1.77394c9d9p-5},
+    {-0x1.401fbaaa67e3cp-45, 0x1.d6ebd1f2p-5},
+    {-0x1.5b1799ceaeb51p-43, 0x1.1bb32a6008p-4},
+    {0x1.7c407050799bfp-43, 0x1.4c560fe688p-4},
+    {0x1.da6339da288fcp-43, 0x1.7d60496cf8p-4},
+    {0x1.be4f6f22dbbadp-43, 0x1.960caf9ab8p-4},
+    {-0x1.c760bc9b188c4p-45, 0x1.c7b528b71p-4},
+    {0x1.164e932b2d51cp-44, 0x1.f9c95dc1dp-4},
+    {0x1.924ae921f7ecap-45, 0x1.097e38ce6p-3},
+    {-0x1.6d25a5b8a19b2p-44, 0x1.22dadc2ab4p-3},
+    {0x1.e50a1644ac794p-43, 0x1.3c6fb650ccp-3},
+    {0x1.f34baa74a7942p-43, 0x1.494f863b8cp-3},
+    {-0x1.8f7aac147fdc1p-46, 0x1.633a8bf438p-3},
+    {0x1.f84be19cb9578p-43, 0x1.7046031c78p-3},
+    {-0x1.66cccab240e9p-46, 0x1.8a8980abfcp-3},
+    {-0x1.3f7a55cd2af4cp-47, 0x1.97c1cb13c8p-3},
+    {0x1.3458cde69308cp-43, 0x1.b2602497d4p-3},
+    {-0x1.667f21fa8423fp-44, 0x1.bfc67a8p-3},
+    {0x1.d2fe4574e09b9p-47, 0x1.dac22d3e44p-3},
+    {0x1.367bde40c5e6dp-43, 0x1.e857d3d36p-3},
+    {0x1.d45da26510033p-46, 0x1.01d9bbcfa6p-2},
+    {-0x1.7204f55bbf90dp-44, 0x1.08bce0d96p-2},
+    {-0x1.d4f1b95e0ff45p-43, 0x1.169c05364p-2},
+    {0x1.c20d74c0211bfp-44, 0x1.1d982c9d52p-2},
+    {0x1.ad89a083e072ap-43, 0x1.249cd2b13cp-2},
+    {0x1.cd0cb4492f1bcp-43, 0x1.32bfee370ep-2},
+    {-0x1.2101a9685c779p-47, 0x1.39de8e155ap-2},
+    {0x1.9451cd394fe8dp-43, 0x1.4106017c3ep-2},
+    {0x1.661e393a16b95p-44, 0x1.4f6fbb2cecp-2},
+    {-0x1.c6d8d86531d56p-44, 0x1.56b22e6b58p-2},
+    {0x1.c1c885adb21d3p-43, 0x1.5dfdcf1eeap-2},
+    {0x1.3bb5921006679p-45, 0x1.6552b49986p-2},
+    {0x1.1d406db502403p-43, 0x1.6cb0f6865cp-2},
+    {0x1.55a63e278bad5p-43, 0x1.7b89f02cf2p-2},
+    {-0x1.66ae2a7ada553p-49, 0x1.8304d90c12p-2},
+    {-0x1.66cccab240e9p-45, 0x1.8a8980abfcp-2},
+    {-0x1.62404772a151dp-45, 0x1.921800924ep-2},
+    {0x1.ac9bca36fd02ep-44, 0x1.99b072a96cp-2},
+    {0x1.4bc302ffa76fbp-43, 0x1.a8ff97181p-2},
+    {0x1.01fea1ec47c71p-43, 0x1.b0b67f4f46p-2},
+    {-0x1.f20203b3186a6p-43, 0x1.b877c57b1cp-2},
+    {-0x1.2642415d47384p-45, 0x1.c043859e3p-2},
+    {-0x1.bc76a2753b99bp-50, 0x1.c819dc2d46p-2},
+    {-0x1.da93ae3a5f451p-43, 0x1.cffae611aep-2},
+    {-0x1.50e785694a8c6p-43, 0x1.d7e6c0abc4p-2},
+    {0x1.c56138c894641p-43, 0x1.dfdd89d586p-2},
+    {0x1.5669df6a2b592p-43, 0x1.e7df5fe538p-2},
+    {-0x1.ea92d9e0e8ac2p-48, 0x1.efec61b012p-2},
+    {0x1.a0331af2e6feap-43, 0x1.f804ae8d0cp-2},
+    {0x1.9518ce032f41dp-48, 0x1.0014332bep-1},
+    {-0x1.b3b3864c60011p-44, 0x1.042bd4b9a8p-1},
+    {-0x1.103e8f00d41c8p-45, 0x1.08494c66b9p-1},
+    {0x1.65be75cc3da17p-43, 0x1.0c6caaf0c5p-1},
+    {0x1.3676289cd3dd4p-43, 0x1.1096015deep-1},
+    {-0x1.41dfc7d7c3321p-43, 0x1.14c560fe69p-1},
+    {0x1.e0cda8bd74461p-44, 0x1.18fadb6e2dp-1},
+    {0x1.2a606046ad444p-44, 0x1.1d368296b5p-1},
+    {0x1.f9ea977a639cp-43, 0x1.217868b0c3p-1},
+    {-0x1.50520a377c7ecp-45, 0x1.25c0a0463cp-1},
+    {0x1.6e3cb71b554e7p-47, 0x1.2a0f3c3407p-1},
+    {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1},
+    {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1},
+    {-0x1.979a5db68721dp-45, 0x1.32bfee370fp-1},
+    {0x1.1ee969a95f529p-43, 0x1.37222bb707p-1},
+    {0x1.bb4b69336b66ep-43, 0x1.3b8b1c68fap-1},
+    {0x1.d5e6a8a4fb059p-45, 0x1.3ffad4e74fp-1},
+    {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1},
+    {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1},
+    {-0x1.9bcaf1aa4168ap-43, 0x1.48eef19318p-1},
+    {0x1.1646b761c48dep-44, 0x1.4d7380dcc4p-1},
+    {0x1.2f0c0bfe9dbecp-43, 0x1.51ff2e3021p-1},
+    {0x1.29904613e33cp-43, 0x1.5692101d9bp-1},
+    {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1},
+    {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1},
+    {-0x1.125d6cbcd1095p-44, 0x1.5fcdce2728p-1},
+    {-0x1.bd9b32266d92cp-43, 0x1.6476d98adap-1},
+    {0x1.54243b21709cep-44, 0x1.6927781d93p-1},
+    {0x1.54243b21709cep-44, 0x1.6927781d93p-1},
+    {-0x1.ce60916e52e91p-44, 0x1.6ddfc2a79p-1},
+    {0x1.f1f5ae718f241p-43, 0x1.729fd26b7p-1},
+    {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1},
+    {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1},
+    {0x1.fed21f9cb2cc5p-43, 0x1.7c37a9227ep-1},
+    {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1},
+    {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1},
+    {0x1.5b338360c2ae2p-43, 0x1.85efd062c6p-1},
+    {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1},
+    {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1},
+    {-0x1.bdc81c4db3134p-44, 0x1.8fc924c89bp-1},
+    {0x1.36c101ee1344p-43, 0x1.94c287492cp-1},
+    {0x1.36c101ee1344p-43, 0x1.94c287492cp-1},
+    {0x1.e41fa0a62e6aep-44, 0x1.99c48be206p-1},
+    {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1},
+    {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1},
+    {-0x1.3f94e00e7d6bcp-46, 0x1.a3e2f4ac44p-1},
+    {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1},
+    {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1},
+    {0x1.1659d8e2d7d38p-44, 0x1.ae255819fp-1},
+    {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1},
+    {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1},
+    {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1},
+    {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1},
+    {0x1.871a7610e40bdp-45, 0x1.bdce9dcc96p-1},
+    {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1},
+    {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1},
+    {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1},
+    {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1},
+    {-0x1.9ad57391924a7p-43, 0x1.cdcebd2374p-1},
+    {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1},
+    {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1},
+    {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1},
+    {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1},
+    {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1},
+    {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1},
+    {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1},
+    {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1},
+    {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1},
+    {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1},
+    {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1},
+    {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1},
+    {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1},
+    {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1},
+    {0x1.ef5d00e390ap-44, 0x1.fa406bd244p-1},
+    {0.0, 1.0},
+};
+
+LIBC_INLINE static constexpr bool is_odd_integer(double x) {
+  using FPBits = fputil::FPBits<double>;
+  FPBits xbits(x);
+  uint64_t x_u = xbits.uintval();
+  unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
+  unsigned lsb =
+      static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK));
+  constexpr unsigned UNIT_EXPONENT =
+      static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
+  return (x_e + lsb == UNIT_EXPONENT);
+}
+
+LIBC_INLINE static constexpr bool is_integer(double x) {
+  using FPBits = fputil::FPBits<double>;
+  FPBits xbits(x);
+  uint64_t x_u = xbits.uintval();
+  unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
+  unsigned lsb =
+      static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK));
+  constexpr unsigned UNIT_EXPONENT =
+      static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
+  return (x_e + lsb >= UNIT_EXPONENT);
+}
+
+} // namespace
+
+LIBC_INLINE static constexpr double pow(double x, double y) {
+  using FPBits = fputil::FPBits<double>;
+
+  FPBits xbits(x), ybits(y);
+
+  bool x_sign = xbits.sign() == Sign::NEG;
+  bool y_sign = ybits.sign() == Sign::NEG;
+
+  FPBits x_abs = xbits.abs();
+  FPBits y_abs = ybits.abs();
+
+  uint64_t x_mant = xbits.get_mantissa();
+  uint64_t y_mant = ybits.get_mantissa();
+  uint64_t x_u = xbits.uintval();
+  uint64_t x_a = x_abs.uintval();
+  uint64_t y_a = y_abs.uintval();
+
+  double e_x = static_cast<double>(xbits.get_exponent());
+  uint64_t sign = 0;
+
+  ///////// BEGIN - Check exceptional cases ////////////////////////////////////
+  // If x or y is signaling NaN
+  if (x_abs.is_signaling_nan() || y_abs.is_signaling_nan()) {
+    fputil::raise_except_if_required(FE_INVALID);
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // The double precision number that is closest to 1 is (1 - 2^-53), which has
+  //   log2(1 - 2^-53) ~ -1.715...p-53.
+  // So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
+  //   |y * log2(x)| = 0 or > 1075.
+  // Hence x^y will either overflow or underflow if x is not zero.
+  if (LIBC_UNLIKELY(y_mant == 0 || y_a > 0x43d7'4910'd52d'3052 ||
+                    x_u == FPBits::one().uintval() ||
+                    x_u >= FPBits::inf().uintval() ||
+                    x_u < FPBits::min_normal().uintval())) {
+    // Exceptional exponents.
+    if (y == 0.0)
+      return 1.0;
+
+    switch (y_a) {
+    case 0x3fe0'0000'0000'0000: { // y = +-0.5
+      // TODO: speed up x^(-1/2) with rsqrt(x) when available.
+      if (LIBC_UNLIKELY(
+              (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) {
+        // pow(-0, 1/2) = +0
+        // pow(-inf, 1/2) = +inf
+        // Make sure it works correctly for FTZ/DAZ.
+        return y_sign ? 1.0 / (x * x) : (x * x);
+      }
+      return y_sign ? (1.0 / fputil::sqrt<double>(x)) : fputil::sqrt<double>(x);
+    }
+    case 0x3ff0'0000'0000'0000: // y = +-1.0
+      return y_sign ? (1.0 / x) : x;
+    case 0x4000'0000'0000'0000: // y = +-2.0;
+      return y_sign ? (1.0 / (x * x)) : (x * x);
+    }
+
+    // |y| > |1075 / log2(1 - 2^-53)|.
+    if (y_a > 0x43d7'4910'd52d'3052) {
+      if (y_a >= 0x7ff0'0000'0000'0000) {
+        // y is inf or nan
+        if (y_mant != 0) {
+          // y is NaN
+          // pow(1, NaN) = 1
+          // pow(x, NaN) = NaN
+          return (x_u == FPBits::one().uintval()) ? 1.0 : y;
+        }
+
+        // Now y is +-Inf
+        if (x_abs.is_nan()) {
+          // pow(NaN, +-Inf) = NaN
+          return x;
+        }
+
+        if (x_a == 0x3ff0'0000'0000'0000) {
+          // pow(+-1, +-Inf) = 1.0
+          return 1.0;
+        }
+
+        if (x == 0.0 && y_sign) {
+          // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
+          fputil::set_errno_if_required(EDOM);
+          fputil::raise_except_if_required(FE_DIVBYZERO);
+          return FPBits::inf().get_val();
+        }
+        // pow (|x| < 1, -inf) = +inf
+        // pow (|x| < 1, +inf) = 0.0
+        // pow (|x| > 1, -inf) = 0.0
+        // pow (|x| > 1, +inf) = +inf
+        return ((x_a < FPBits::one().uintval()) == y_sign)
+                   ? FPBits::inf().get_val()
+                   : 0.0;
+      }
+      // x^y will overflow / underflow in double precision.  Set y to a
+      // large enough exponent but not too large, so that the computations
+      // won't overflow in double precision.
+      y = y_sign ? -0x1.0p100 : 0x1.0p100;
+    }
+
+    // y is finite and non-zero.
+
+    if (x_u == FPBits::one().uintval()) {
+      // pow(1, y) = 1
+      return 1.0;
+    }
+
+    // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y).
+
+    if (x == 0.0) {
+      bool out_is_neg = x_sign && is_odd_integer(y);
+      if (y_sign) {
+        // pow(0, negative number) = inf
+        fputil::set_errno_if_required(EDOM);
+        fputil::raise_except_if_required(FE_DIVBYZERO);
+        return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
+      }
+      // pow(0, positive number) = 0
+      return out_is_neg ? -0.0 : 0.0;
+    }
+
+    if (x_a == FPBits::inf().uintval()) {
+      bool out_is_neg = x_sign && is_odd_integer(y);
+      if (y_sign)
+        return out_is_neg ? -0.0 : 0.0;
+      return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
+    }
+
+    if (x_a > FPBits::inf().uintval()) {
+      // x is NaN.
+      // pow (aNaN, 0) is already taken care above.
+      return x;
+    }
+
+    // Normalize denormal inputs.
+    if (x_a < FPBits::min_normal().uintval()) {
+      e_x -= 64.0;
+      x_mant = FPBits(x * 0x1.0p64).get_mantissa();
+    }
+
+    // x is finite and negative, and y is a finite integer.
+    if (x_sign) {
+      if (is_integer(y)) {
+        x = -x;
+        if (is_odd_integer(y))
+          // sign = -1.0;
+          sign = 0x8000'0000'0000'0000;
+      } else {
+        // pow( negative, non-integer ) = NaN
+        fputil::set_errno_if_required(EDOM);
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+    }
+  }
+
+  ///////// END - Check exceptional cases //////////////////////////////////////
+
+  // x^y = 2^( y * log2(x) )
+  //     = 2^( y * ( e_x + log2(m_x) ) )
+  // First we compute log2(x) = e_x + log2(m_x)
+
+  // Extract exponent field of x.
+
+  // Use the highest 7 fractional bits of m_x as the index for look up tables.
+  unsigned idx_x = static_cast<unsigned>(x_mant >> (FPBits::FRACTION_LEN - 7));
+  // Add the hidden bit to the mantissa.
+  // 1 <= m_x < 2
+  FPBits m_x = FPBits(x_mant | 0x3ff0'0000'0000'0000);
+
+  // Reduced argument for log2(m_x):
+  //   dx = r * m_x - 1.
+  // The computation is exact, and -2^-8 <= dx < 2^-7.
+  // Then m_x = (1 + dx) / r, and
+  //   log2(m_x) = log2( (1 + dx) / r )
+  //             = log2(1 + dx) - log2(r).
+
+  // In order for the overall computations x^y = 2^(y * log2(x)) to have the
+  // relative errors < 2^-52 (1ULP), we will need to evaluate the exponent part
+  // y * log2(x) with absolute errors < 2^-52 (or better, 2^-53).  Since the
+  // whole exponent range for double precision is bounded by
+  // |y * log2(x)| < 1076 ~ 2^10, we need to evaluate log2(x) with absolute
+  // errors < 2^-53 * 2^-10 = 2^-63.
+
+  // With that requirement, we use the following degree-6 polynomial
+  // approximation:
+  //   P(dx) ~ log2(1 + dx) / dx
+  // Generated by Sollya with:
+  // > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P;
+  // > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]);
+  //   0x1.d03cc...p-66
+  constexpr double COEFFS[] = {0x1.71547652b82fep0,  -0x1.71547652b82e7p-1,
+                               0x1.ec709dc3b1fd5p-2, -0x1.7154766124215p-2,
+                               0x1.2776bd90259d8p-2, -0x1.ec586c6f3d311p-3,
+                               0x1.9c4775eccf524p-3};
+  // Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66
+  // Extra errors from various computations and rounding directions, the overall
+  // errors we can be bounded by 2^-65.
+
+  DoubleDouble dx_c0;
+
+  // Perform exact range reduction and exact product dx * c0.
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
+  dx_c0 = fputil::exact_mult(COEFFS[0], dx);
+#else
+  double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val();
+  double dx =
+      fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
+  dx_c0 = fputil::exact_mult<double, 28>(dx, COEFFS[0]);             // Exact
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+  double dx2 = dx * dx;
+  double c0 = fputil::multiply_add(dx, COEFFS[2], COEFFS[1]);
+  double c1 = fputil::multiply_add(dx, COEFFS[4], COEFFS[3]);
+  double c2 = fputil::multiply_add(dx, COEFFS[6], COEFFS[5]);
+
+  double p = fputil::polyeval(dx2, c0, c1, c2);
+
+  // s = e_x - log2(r) + dx * P(dx)
+  // Absolute error bound:
+  //   |log2(x) - log2_x.hi - log2_x.lo| < 2^-65.
+
+  // Notice that e_x - log2(r).hi is exact, so we perform an exact sum of
+  // e_x - log2(r).hi and the high part of the product dx * c0:
+  //   log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi
+  DoubleDouble log2_x_hi =
+      fputil::exact_add(e_x + LOG2_R_DD[idx_x].hi, dx_c0.hi);
+  // The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r).
+  double log2_x_lo =
+      fputil::multiply_add(dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo);
+  // Perform accurate sums.
+  DoubleDouble log2_x = fputil::exact_add(log2_x_hi.hi, log2_x_lo);
+  log2_x.lo += log2_x_hi.lo;
+
+  // To compute 2^(y * log2(x)), we break the exponent into 3 parts:
+  //   y * log(2) = hi + mid + lo, where
+  //   hi is an integer
+  //   mid * 2^6 is an integer
+  //   |lo| <= 2^-7
+  // Then:
+  //   x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo,
+  // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements,
+  // and 2^lo ~ 1 + lo * P(lo).
+  // Thus, we have:
+  //   hi + mid = 2^-6 * round( 2^6 * y * log2(x) )
+  // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6)
+  // bits, hence, if we use double precision to perform
+  //   round( 2^6 * y * log2(x))
+  // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38
+
+  // In the following computations:
+  //   y6  = 2^6 * y
+  //   hm  = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s)
+  //   lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm.
+  double y6 = y * 0x1.0p6; // Exact.
+
+  DoubleDouble y6_log2_x = fputil::exact_mult(y6, log2_x.hi);
+  y6_log2_x.lo = fputil::multiply_add(y6, log2_x.lo, y6_log2_x.lo);
+
+  // Check overflow/underflow.
+  double scale = 1.0;
+
+  // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2
+  // Clamp the exponent part into smaller range that fits double precision.
+  // For those exponents that are out of range, the final conversion will round
+  // them correctly to inf/max float or 0/min float accordingly.
+  constexpr double UPPER_EXP_BOUND = 512.0 * 0x1.0p6;
+  if (LIBC_UNLIKELY(FPBits(y6_log2_x.hi).abs().get_val() >= UPPER_EXP_BOUND)) {
+    if (FPBits(y6_log2_x.hi).sign() == Sign::POS) {
+      scale = 0x1.0p512;
+      y6_log2_x.hi -= 512.0 * 64.0;
+      if (y6_log2_x.hi > 513.0 * 64.0)
+        y6_log2_x.hi = 513.0 * 64.0;
+    } else {
+      scale = 0x1.0p-512;
+      y6_log2_x.hi += 512.0 * 64.0;
+      if (y6_log2_x.hi < (-1076.0 + 512.0) * 64.0)
+        y6_log2_x.hi = -564.0 * 64.0;
+    }
+  }
+
+  double hm = fputil::nearest_integer(y6_log2_x.hi);
+
+  // lo6 = 2^6 * lo.
+  double lo6_hi = y6_log2_x.hi - hm;
+  double lo6 = lo6_hi + y6_log2_x.lo;
+
+  int hm_i = static_cast<int>(hm);
+  unsigned idx_y = static_cast<unsigned>(hm_i) & 0x3f;
+
+  // 2^hi
+  int64_t exp2_hi_i = static_cast<int64_t>(
+      static_cast<uint64_t>(static_cast<int64_t>(hm_i >> 6))
+      << FPBits::FRACTION_LEN);
+  // 2^mid
+  int64_t exp2_mid_hi_i =
+      static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].hi).uintval());
+  int64_t exp2_mid_lo_i =
+      static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].mid).uintval());
+  // (-1)^sign * 2^hi * 2^mid
+  // Error <= 2^hi * 2^-53
+  uint64_t exp2_hm_hi_i =
+      static_cast<uint64_t>(exp2_hi_i + exp2_mid_hi_i) + sign;
+  // The low part could be 0.
+  uint64_t exp2_hm_lo_i =
+      idx_y != 0 ? static_cast<uint64_t>(exp2_hi_i + exp2_mid_lo_i) + sign
+                 : sign;
+  double exp2_hm_hi = FPBits(exp2_hm_hi_i).get_val();
+  double exp2_hm_lo = FPBits(exp2_hm_lo_i).get_val();
+
+  // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
+  // Generated by Sollya with:
+  // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]);
+  // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]);
+  // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60
+  constexpr double EXP2_COEFFS[] = {0x1p0,
+                                    0x1.62e42fefa39efp-7,
+                                    0x1.ebfbdff82a23ap-15,
+                                    0x1.c6b08d7076268p-23,
+                                    0x1.3b2ad33f8b48bp-31,
+                                    0x1.5d870c4d84445p-40};
+
+  double lo6_sqr = lo6 * lo6;
+
+  double d0 = fputil::multiply_add(lo6, EXP2_COEFFS[2], EXP2_COEFFS[1]);
+  double d1 = fputil::multiply_add(lo6, EXP2_COEFFS[4], EXP2_COEFFS[3]);
+  double pp = fputil::polyeval(lo6_sqr, d0, d1, EXP2_COEFFS[5]);
+
+  double r = fputil::multiply_add(exp2_hm_hi * lo6, pp, exp2_hm_lo);
+  r += exp2_hm_hi;
+
+  return r * scale;
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_POW_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index e4ad6fcafeaf0..e947496f68962 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1581,18 +1581,7 @@ add_entrypoint_object(
   HDRS
     ../powf.h
   DEPENDS
-    libc.src.__support.math.exp10f
-    libc.src.__support.math.exp2f
-    libc.src.__support.CPP.bit
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.FPUtil.triple_double
-    libc.src.__support.macros.optimization
-    libc.src.__support.math.common_constants
+    libc.src.__support.math.pow
     libc.src.errno.errno
 )
 
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
index c9f685b82fcb5..5fbd7f5441add 100644
--- a/libc/src/math/generic/pow.cpp
+++ b/libc/src/math/generic/pow.cpp
@@ -7,531 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/pow.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/CPP/bit.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/sqrt.h" // Speedup for pow(x, 1/2) = sqrt(x)
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/math/common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
-#include "src/__support/math/exp_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "src/__support/math/pow.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-using fputil::DoubleDouble;
-
-namespace {
-
-using namespace common_constants_internal;
-
-// Constants for log2(x) range reduction, generated by Sollya with:
-// > for i from 0 to 127 do {
-//     r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) );
-//     b = nearestint(log2(r) * 2^41) * 2^-41;
-//     c = round(log2(r) - b, D, RN);
-//     print("{", -c, ",", -b, "},");
-//   };
-// This is the same as -log2(RD[i]), with the least significant bits of the
-// high part set to be 2^-41, so that the sum of high parts + e_x is exact in
-// double precision.
-// We also replace the first and the last ones to be 0.
-constexpr DoubleDouble LOG2_R_DD[128] = {
-    {0.0, 0.0},
-    {-0x1.19b14945cf6bap-44, 0x1.72c7ba21p-7},
-    {-0x1.95539356f93dcp-43, 0x1.743ee862p-6},
-    {0x1.abe0a48f83604p-43, 0x1.184b8e4c5p-5},
-    {0x1.635577970e04p-43, 0x1.77394c9d9p-5},
-    {-0x1.401fbaaa67e3cp-45, 0x1.d6ebd1f2p-5},
-    {-0x1.5b1799ceaeb51p-43, 0x1.1bb32a6008p-4},
-    {0x1.7c407050799bfp-43, 0x1.4c560fe688p-4},
-    {0x1.da6339da288fcp-43, 0x1.7d60496cf8p-4},
-    {0x1.be4f6f22dbbadp-43, 0x1.960caf9ab8p-4},
-    {-0x1.c760bc9b188c4p-45, 0x1.c7b528b71p-4},
-    {0x1.164e932b2d51cp-44, 0x1.f9c95dc1dp-4},
-    {0x1.924ae921f7ecap-45, 0x1.097e38ce6p-3},
-    {-0x1.6d25a5b8a19b2p-44, 0x1.22dadc2ab4p-3},
-    {0x1.e50a1644ac794p-43, 0x1.3c6fb650ccp-3},
-    {0x1.f34baa74a7942p-43, 0x1.494f863b8cp-3},
-    {-0x1.8f7aac147fdc1p-46, 0x1.633a8bf438p-3},
-    {0x1.f84be19cb9578p-43, 0x1.7046031c78p-3},
-    {-0x1.66cccab240e9p-46, 0x1.8a8980abfcp-3},
-    {-0x1.3f7a55cd2af4cp-47, 0x1.97c1cb13c8p-3},
-    {0x1.3458cde69308cp-43, 0x1.b2602497d4p-3},
-    {-0x1.667f21fa8423fp-44, 0x1.bfc67a8p-3},
-    {0x1.d2fe4574e09b9p-47, 0x1.dac22d3e44p-3},
-    {0x1.367bde40c5e6dp-43, 0x1.e857d3d36p-3},
-    {0x1.d45da26510033p-46, 0x1.01d9bbcfa6p-2},
-    {-0x1.7204f55bbf90dp-44, 0x1.08bce0d96p-2},
-    {-0x1.d4f1b95e0ff45p-43, 0x1.169c05364p-2},
-    {0x1.c20d74c0211bfp-44, 0x1.1d982c9d52p-2},
-    {0x1.ad89a083e072ap-43, 0x1.249cd2b13cp-2},
-    {0x1.cd0cb4492f1bcp-43, 0x1.32bfee370ep-2},
-    {-0x1.2101a9685c779p-47, 0x1.39de8e155ap-2},
-    {0x1.9451cd394fe8dp-43, 0x1.4106017c3ep-2},
-    {0x1.661e393a16b95p-44, 0x1.4f6fbb2cecp-2},
-    {-0x1.c6d8d86531d56p-44, 0x1.56b22e6b58p-2},
-    {0x1.c1c885adb21d3p-43, 0x1.5dfdcf1eeap-2},
-    {0x1.3bb5921006679p-45, 0x1.6552b49986p-2},
-    {0x1.1d406db502403p-43, 0x1.6cb0f6865cp-2},
-    {0x1.55a63e278bad5p-43, 0x1.7b89f02cf2p-2},
-    {-0x1.66ae2a7ada553p-49, 0x1.8304d90c12p-2},
-    {-0x1.66cccab240e9p-45, 0x1.8a8980abfcp-2},
-    {-0x1.62404772a151dp-45, 0x1.921800924ep-2},
-    {0x1.ac9bca36fd02ep-44, 0x1.99b072a96cp-2},
-    {0x1.4bc302ffa76fbp-43, 0x1.a8ff97181p-2},
-    {0x1.01fea1ec47c71p-43, 0x1.b0b67f4f46p-2},
-    {-0x1.f20203b3186a6p-43, 0x1.b877c57b1cp-2},
-    {-0x1.2642415d47384p-45, 0x1.c043859e3p-2},
-    {-0x1.bc76a2753b99bp-50, 0x1.c819dc2d46p-2},
-    {-0x1.da93ae3a5f451p-43, 0x1.cffae611aep-2},
-    {-0x1.50e785694a8c6p-43, 0x1.d7e6c0abc4p-2},
-    {0x1.c56138c894641p-43, 0x1.dfdd89d586p-2},
-    {0x1.5669df6a2b592p-43, 0x1.e7df5fe538p-2},
-    {-0x1.ea92d9e0e8ac2p-48, 0x1.efec61b012p-2},
-    {0x1.a0331af2e6feap-43, 0x1.f804ae8d0cp-2},
-    {0x1.9518ce032f41dp-48, 0x1.0014332bep-1},
-    {-0x1.b3b3864c60011p-44, 0x1.042bd4b9a8p-1},
-    {-0x1.103e8f00d41c8p-45, 0x1.08494c66b9p-1},
-    {0x1.65be75cc3da17p-43, 0x1.0c6caaf0c5p-1},
-    {0x1.3676289cd3dd4p-43, 0x1.1096015deep-1},
-    {-0x1.41dfc7d7c3321p-43, 0x1.14c560fe69p-1},
-    {0x1.e0cda8bd74461p-44, 0x1.18fadb6e2dp-1},
-    {0x1.2a606046ad444p-44, 0x1.1d368296b5p-1},
-    {0x1.f9ea977a639cp-43, 0x1.217868b0c3p-1},
-    {-0x1.50520a377c7ecp-45, 0x1.25c0a0463cp-1},
-    {0x1.6e3cb71b554e7p-47, 0x1.2a0f3c3407p-1},
-    {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1},
-    {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1},
-    {-0x1.979a5db68721dp-45, 0x1.32bfee370fp-1},
-    {0x1.1ee969a95f529p-43, 0x1.37222bb707p-1},
-    {0x1.bb4b69336b66ep-43, 0x1.3b8b1c68fap-1},
-    {0x1.d5e6a8a4fb059p-45, 0x1.3ffad4e74fp-1},
-    {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1},
-    {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1},
-    {-0x1.9bcaf1aa4168ap-43, 0x1.48eef19318p-1},
-    {0x1.1646b761c48dep-44, 0x1.4d7380dcc4p-1},
-    {0x1.2f0c0bfe9dbecp-43, 0x1.51ff2e3021p-1},
-    {0x1.29904613e33cp-43, 0x1.5692101d9bp-1},
-    {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1},
-    {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1},
-    {-0x1.125d6cbcd1095p-44, 0x1.5fcdce2728p-1},
-    {-0x1.bd9b32266d92cp-43, 0x1.6476d98adap-1},
-    {0x1.54243b21709cep-44, 0x1.6927781d93p-1},
-    {0x1.54243b21709cep-44, 0x1.6927781d93p-1},
-    {-0x1.ce60916e52e91p-44, 0x1.6ddfc2a79p-1},
-    {0x1.f1f5ae718f241p-43, 0x1.729fd26b7p-1},
-    {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1},
-    {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1},
-    {0x1.fed21f9cb2cc5p-43, 0x1.7c37a9227ep-1},
-    {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1},
-    {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1},
-    {0x1.5b338360c2ae2p-43, 0x1.85efd062c6p-1},
-    {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1},
-    {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1},
-    {-0x1.bdc81c4db3134p-44, 0x1.8fc924c89bp-1},
-    {0x1.36c101ee1344p-43, 0x1.94c287492cp-1},
-    {0x1.36c101ee1344p-43, 0x1.94c287492cp-1},
-    {0x1.e41fa0a62e6aep-44, 0x1.99c48be206p-1},
-    {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1},
-    {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1},
-    {-0x1.3f94e00e7d6bcp-46, 0x1.a3e2f4ac44p-1},
-    {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1},
-    {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1},
-    {0x1.1659d8e2d7d38p-44, 0x1.ae255819fp-1},
-    {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1},
-    {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1},
-    {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1},
-    {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1},
-    {0x1.871a7610e40bdp-45, 0x1.bdce9dcc96p-1},
-    {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1},
-    {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1},
-    {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1},
-    {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1},
-    {-0x1.9ad57391924a7p-43, 0x1.cdcebd2374p-1},
-    {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1},
-    {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1},
-    {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1},
-    {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1},
-    {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1},
-    {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1},
-    {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1},
-    {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1},
-    {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1},
-    {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1},
-    {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1},
-    {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1},
-    {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1},
-    {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1},
-    {0x1.ef5d00e390ap-44, 0x1.fa406bd244p-1},
-    {0.0, 1.0},
-};
-
-bool is_odd_integer(double x) {
-  using FPBits = fputil::FPBits<double>;
-  FPBits xbits(x);
-  uint64_t x_u = xbits.uintval();
-  unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
-  unsigned lsb =
-      static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK));
-  constexpr unsigned UNIT_EXPONENT =
-      static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
-  return (x_e + lsb == UNIT_EXPONENT);
-}
-
-bool is_integer(double x) {
-  using FPBits = fputil::FPBits<double>;
-  FPBits xbits(x);
-  uint64_t x_u = xbits.uintval();
-  unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
-  unsigned lsb =
-      static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK));
-  constexpr unsigned UNIT_EXPONENT =
-      static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
-  return (x_e + lsb >= UNIT_EXPONENT);
-}
-
-} // namespace
-
 LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
-  using FPBits = fputil::FPBits<double>;
-
-  FPBits xbits(x), ybits(y);
-
-  bool x_sign = xbits.sign() == Sign::NEG;
-  bool y_sign = ybits.sign() == Sign::NEG;
-
-  FPBits x_abs = xbits.abs();
-  FPBits y_abs = ybits.abs();
-
-  uint64_t x_mant = xbits.get_mantissa();
-  uint64_t y_mant = ybits.get_mantissa();
-  uint64_t x_u = xbits.uintval();
-  uint64_t x_a = x_abs.uintval();
-  uint64_t y_a = y_abs.uintval();
-
-  double e_x = static_cast<double>(xbits.get_exponent());
-  uint64_t sign = 0;
-
-  ///////// BEGIN - Check exceptional cases ////////////////////////////////////
-  // If x or y is signaling NaN
-  if (x_abs.is_signaling_nan() || y_abs.is_signaling_nan()) {
-    fputil::raise_except_if_required(FE_INVALID);
-    return FPBits::quiet_nan().get_val();
-  }
-
-  // The double precision number that is closest to 1 is (1 - 2^-53), which has
-  //   log2(1 - 2^-53) ~ -1.715...p-53.
-  // So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
-  //   |y * log2(x)| = 0 or > 1075.
-  // Hence x^y will either overflow or underflow if x is not zero.
-  if (LIBC_UNLIKELY(y_mant == 0 || y_a > 0x43d7'4910'd52d'3052 ||
-                    x_u == FPBits::one().uintval() ||
-                    x_u >= FPBits::inf().uintval() ||
-                    x_u < FPBits::min_normal().uintval())) {
-    // Exceptional exponents.
-    if (y == 0.0)
-      return 1.0;
-
-    switch (y_a) {
-    case 0x3fe0'0000'0000'0000: { // y = +-0.5
-      // TODO: speed up x^(-1/2) with rsqrt(x) when available.
-      if (LIBC_UNLIKELY(
-              (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) {
-        // pow(-0, 1/2) = +0
-        // pow(-inf, 1/2) = +inf
-        // Make sure it works correctly for FTZ/DAZ.
-        return y_sign ? 1.0 / (x * x) : (x * x);
-      }
-      return y_sign ? (1.0 / fputil::sqrt<double>(x)) : fputil::sqrt<double>(x);
-    }
-    case 0x3ff0'0000'0000'0000: // y = +-1.0
-      return y_sign ? (1.0 / x) : x;
-    case 0x4000'0000'0000'0000: // y = +-2.0;
-      return y_sign ? (1.0 / (x * x)) : (x * x);
-    }
-
-    // |y| > |1075 / log2(1 - 2^-53)|.
-    if (y_a > 0x43d7'4910'd52d'3052) {
-      if (y_a >= 0x7ff0'0000'0000'0000) {
-        // y is inf or nan
-        if (y_mant != 0) {
-          // y is NaN
-          // pow(1, NaN) = 1
-          // pow(x, NaN) = NaN
-          return (x_u == FPBits::one().uintval()) ? 1.0 : y;
-        }
-
-        // Now y is +-Inf
-        if (x_abs.is_nan()) {
-          // pow(NaN, +-Inf) = NaN
-          return x;
-        }
-
-        if (x_a == 0x3ff0'0000'0000'0000) {
-          // pow(+-1, +-Inf) = 1.0
-          return 1.0;
-        }
-
-        if (x == 0.0 && y_sign) {
-          // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
-          fputil::set_errno_if_required(EDOM);
-          fputil::raise_except_if_required(FE_DIVBYZERO);
-          return FPBits::inf().get_val();
-        }
-        // pow (|x| < 1, -inf) = +inf
-        // pow (|x| < 1, +inf) = 0.0
-        // pow (|x| > 1, -inf) = 0.0
-        // pow (|x| > 1, +inf) = +inf
-        return ((x_a < FPBits::one().uintval()) == y_sign)
-                   ? FPBits::inf().get_val()
-                   : 0.0;
-      }
-      // x^y will overflow / underflow in double precision.  Set y to a
-      // large enough exponent but not too large, so that the computations
-      // won't overflow in double precision.
-      y = y_sign ? -0x1.0p100 : 0x1.0p100;
-    }
-
-    // y is finite and non-zero.
-
-    if (x_u == FPBits::one().uintval()) {
-      // pow(1, y) = 1
-      return 1.0;
-    }
-
-    // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y).
-
-    if (x == 0.0) {
-      bool out_is_neg = x_sign && is_odd_integer(y);
-      if (y_sign) {
-        // pow(0, negative number) = inf
-        fputil::set_errno_if_required(EDOM);
-        fputil::raise_except_if_required(FE_DIVBYZERO);
-        return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
-      }
-      // pow(0, positive number) = 0
-      return out_is_neg ? -0.0 : 0.0;
-    }
-
-    if (x_a == FPBits::inf().uintval()) {
-      bool out_is_neg = x_sign && is_odd_integer(y);
-      if (y_sign)
-        return out_is_neg ? -0.0 : 0.0;
-      return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
-    }
-
-    if (x_a > FPBits::inf().uintval()) {
-      // x is NaN.
-      // pow (aNaN, 0) is already taken care above.
-      return x;
-    }
-
-    // Normalize denormal inputs.
-    if (x_a < FPBits::min_normal().uintval()) {
-      e_x -= 64.0;
-      x_mant = FPBits(x * 0x1.0p64).get_mantissa();
-    }
-
-    // x is finite and negative, and y is a finite integer.
-    if (x_sign) {
-      if (is_integer(y)) {
-        x = -x;
-        if (is_odd_integer(y))
-          // sign = -1.0;
-          sign = 0x8000'0000'0000'0000;
-      } else {
-        // pow( negative, non-integer ) = NaN
-        fputil::set_errno_if_required(EDOM);
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-    }
-  }
-
-  ///////// END - Check exceptional cases //////////////////////////////////////
-
-  // x^y = 2^( y * log2(x) )
-  //     = 2^( y * ( e_x + log2(m_x) ) )
-  // First we compute log2(x) = e_x + log2(m_x)
-
-  // Extract exponent field of x.
-
-  // Use the highest 7 fractional bits of m_x as the index for look up tables.
-  unsigned idx_x = static_cast<unsigned>(x_mant >> (FPBits::FRACTION_LEN - 7));
-  // Add the hidden bit to the mantissa.
-  // 1 <= m_x < 2
-  FPBits m_x = FPBits(x_mant | 0x3ff0'0000'0000'0000);
-
-  // Reduced argument for log2(m_x):
-  //   dx = r * m_x - 1.
-  // The computation is exact, and -2^-8 <= dx < 2^-7.
-  // Then m_x = (1 + dx) / r, and
-  //   log2(m_x) = log2( (1 + dx) / r )
-  //             = log2(1 + dx) - log2(r).
-
-  // In order for the overall computations x^y = 2^(y * log2(x)) to have the
-  // relative errors < 2^-52 (1ULP), we will need to evaluate the exponent part
-  // y * log2(x) with absolute errors < 2^-52 (or better, 2^-53).  Since the
-  // whole exponent range for double precision is bounded by
-  // |y * log2(x)| < 1076 ~ 2^10, we need to evaluate log2(x) with absolute
-  // errors < 2^-53 * 2^-10 = 2^-63.
-
-  // With that requirement, we use the following degree-6 polynomial
-  // approximation:
-  //   P(dx) ~ log2(1 + dx) / dx
-  // Generated by Sollya with:
-  // > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P;
-  // > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]);
-  //   0x1.d03cc...p-66
-  constexpr double COEFFS[] = {0x1.71547652b82fep0,  -0x1.71547652b82e7p-1,
-                               0x1.ec709dc3b1fd5p-2, -0x1.7154766124215p-2,
-                               0x1.2776bd90259d8p-2, -0x1.ec586c6f3d311p-3,
-                               0x1.9c4775eccf524p-3};
-  // Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66
-  // Extra errors from various computations and rounding directions, the overall
-  // errors we can be bounded by 2^-65.
-
-  double dx;
-  DoubleDouble dx_c0;
-
-  // Perform exact range reduction and exact product dx * c0.
-#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
-  dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
-  dx_c0 = fputil::exact_mult(COEFFS[0], dx);
-#else
-  double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val();
-  dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
-  dx_c0 = fputil::exact_mult<double, 28>(dx, COEFFS[0]);              // Exact
-#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
-
-  double dx2 = dx * dx;
-  double c0 = fputil::multiply_add(dx, COEFFS[2], COEFFS[1]);
-  double c1 = fputil::multiply_add(dx, COEFFS[4], COEFFS[3]);
-  double c2 = fputil::multiply_add(dx, COEFFS[6], COEFFS[5]);
-
-  double p = fputil::polyeval(dx2, c0, c1, c2);
-
-  // s = e_x - log2(r) + dx * P(dx)
-  // Absolute error bound:
-  //   |log2(x) - log2_x.hi - log2_x.lo| < 2^-65.
-
-  // Notice that e_x - log2(r).hi is exact, so we perform an exact sum of
-  // e_x - log2(r).hi and the high part of the product dx * c0:
-  //   log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi
-  DoubleDouble log2_x_hi =
-      fputil::exact_add(e_x + LOG2_R_DD[idx_x].hi, dx_c0.hi);
-  // The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r).
-  double log2_x_lo =
-      fputil::multiply_add(dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo);
-  // Perform accurate sums.
-  DoubleDouble log2_x = fputil::exact_add(log2_x_hi.hi, log2_x_lo);
-  log2_x.lo += log2_x_hi.lo;
-
-  // To compute 2^(y * log2(x)), we break the exponent into 3 parts:
-  //   y * log(2) = hi + mid + lo, where
-  //   hi is an integer
-  //   mid * 2^6 is an integer
-  //   |lo| <= 2^-7
-  // Then:
-  //   x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo,
-  // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements,
-  // and 2^lo ~ 1 + lo * P(lo).
-  // Thus, we have:
-  //   hi + mid = 2^-6 * round( 2^6 * y * log2(x) )
-  // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6)
-  // bits, hence, if we use double precision to perform
-  //   round( 2^6 * y * log2(x))
-  // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38
-
-  // In the following computations:
-  //   y6  = 2^6 * y
-  //   hm  = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s)
-  //   lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm.
-  double y6 = y * 0x1.0p6; // Exact.
-
-  DoubleDouble y6_log2_x = fputil::exact_mult(y6, log2_x.hi);
-  y6_log2_x.lo = fputil::multiply_add(y6, log2_x.lo, y6_log2_x.lo);
-
-  // Check overflow/underflow.
-  double scale = 1.0;
-
-  // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2
-  // Clamp the exponent part into smaller range that fits double precision.
-  // For those exponents that are out of range, the final conversion will round
-  // them correctly to inf/max float or 0/min float accordingly.
-  constexpr double UPPER_EXP_BOUND = 512.0 * 0x1.0p6;
-  if (LIBC_UNLIKELY(FPBits(y6_log2_x.hi).abs().get_val() >= UPPER_EXP_BOUND)) {
-    if (FPBits(y6_log2_x.hi).sign() == Sign::POS) {
-      scale = 0x1.0p512;
-      y6_log2_x.hi -= 512.0 * 64.0;
-      if (y6_log2_x.hi > 513.0 * 64.0)
-        y6_log2_x.hi = 513.0 * 64.0;
-    } else {
-      scale = 0x1.0p-512;
-      y6_log2_x.hi += 512.0 * 64.0;
-      if (y6_log2_x.hi < (-1076.0 + 512.0) * 64.0)
-        y6_log2_x.hi = -564.0 * 64.0;
-    }
-  }
-
-  double hm = fputil::nearest_integer(y6_log2_x.hi);
-
-  // lo6 = 2^6 * lo.
-  double lo6_hi = y6_log2_x.hi - hm;
-  double lo6 = lo6_hi + y6_log2_x.lo;
-
-  int hm_i = static_cast<int>(hm);
-  unsigned idx_y = static_cast<unsigned>(hm_i) & 0x3f;
-
-  // 2^hi
-  int64_t exp2_hi_i = static_cast<int64_t>(
-      static_cast<uint64_t>(static_cast<int64_t>(hm_i >> 6))
-      << FPBits::FRACTION_LEN);
-  // 2^mid
-  int64_t exp2_mid_hi_i =
-      static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].hi).uintval());
-  int64_t exp2_mid_lo_i =
-      static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].mid).uintval());
-  // (-1)^sign * 2^hi * 2^mid
-  // Error <= 2^hi * 2^-53
-  uint64_t exp2_hm_hi_i =
-      static_cast<uint64_t>(exp2_hi_i + exp2_mid_hi_i) + sign;
-  // The low part could be 0.
-  uint64_t exp2_hm_lo_i =
-      idx_y != 0 ? static_cast<uint64_t>(exp2_hi_i + exp2_mid_lo_i) + sign
-                 : sign;
-  double exp2_hm_hi = FPBits(exp2_hm_hi_i).get_val();
-  double exp2_hm_lo = FPBits(exp2_hm_lo_i).get_val();
-
-  // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
-  // Generated by Sollya with:
-  // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]);
-  // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]);
-  // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60
-  constexpr double EXP2_COEFFS[] = {0x1p0,
-                                    0x1.62e42fefa39efp-7,
-                                    0x1.ebfbdff82a23ap-15,
-                                    0x1.c6b08d7076268p-23,
-                                    0x1.3b2ad33f8b48bp-31,
-                                    0x1.5d870c4d84445p-40};
-
-  double lo6_sqr = lo6 * lo6;
-
-  double d0 = fputil::multiply_add(lo6, EXP2_COEFFS[2], EXP2_COEFFS[1]);
-  double d1 = fputil::multiply_add(lo6, EXP2_COEFFS[4], EXP2_COEFFS[3]);
-  double pp = fputil::polyeval(lo6_sqr, d0, d1, EXP2_COEFFS[5]);
-
-  double r = fputil::multiply_add(exp2_hm_hi * lo6, pp, exp2_hm_lo);
-  r += exp2_hm_hi;
-
-  return r * scale;
+  return math::pow(x, y);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 61625c0bc0444..1173b32e11e6a 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -70,6 +70,7 @@ add_fp_unittest(
     libc.src.__support.math.ldexpf128
     libc.src.__support.math.ldexpf16
     libc.src.__support.math.llogbf
+    libc.src.__support.math.pow
     libc.src.__support.math.rsqrtf
     libc.src.__support.math.rsqrtf16
     libc.src.__support.math.sin
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 49047631fb227..428b17c7fbe89 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -98,6 +98,7 @@ TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::expm1(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::fsqrt(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::log(1.0));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::shared::pow(0.0, 0.0));
   EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::shared::sin(0.0));
 }
 
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 3d9491e8baa0c..91c1914ceeaed 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2957,6 +2957,25 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_pow",
+    hdrs = ["src/__support/math/pow.h"],
+    deps = [
+        ":__support_cpp_bit",
+        ":__support_fputil_fenv_impl",
+        ":__support_fputil_fp_bits",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_sqrt",
+        ":__support_fputil_triple_double",
+        ":__support_macros_optimization",
+        ":__support_math_common_constants",
+        ":__support_math_exp10f",
+        ":__support_math_exp2f",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_exp_constants",
     hdrs = ["src/__support/math/exp_constants.h"],
@@ -4757,11 +4776,8 @@ libc_math_function(name = "nextupf16")
 libc_math_function(
     name = "pow",
     additional_deps = [
-        ":__support_fputil_double_double",
-        ":__support_fputil_nearest_integer",
-        ":__support_fputil_polyeval",
-        ":__support_fputil_sqrt",
-        ":__support_math_common_constants",
+        ":__support_math_pow",
+        ":errno",
     ],
 )
 



More information about the libc-commits mailing list