[libc-commits] [libc] [libc][math] Implement a code-size optimized version of powf. (PR #190984)

Wed Apr 8 07:54:30 PDT 2026

https://github.com/lntue created https://github.com/llvm/llvm-project/pull/190984

Code size of powf on armv8m:

Before:
```
$ ls -l libc/src/math/generic/CMakeFiles/libc.src.math.generic.powf.dir/
total 12
-rw-r----- 1 lntue primarygroup 9812 Apr  8 14:51 powf.cpp.obj
```

After:
```
$ ls -l libc/src/math/generic/CMakeFiles/libc.src.math.generic.powf.dir/
total 8
-rw-r----- 1 lntue primarygroup 4700 Apr  8 14:50 powf.cpp.obj
```

>From 288fd199f819d522191e4a60e85f3060f4890c2d Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Wed, 8 Apr 2026 14:46:50 +0000
Subject: [PATCH] [libc][math] Implement a code-size optimized version of powf.

---
 libc/src/__support/math/powf.h              |  24 +++-
 libc/src/__support/math/powf_small_tables.h | 120 ++++++++++++++++++++
 2 files changed, 140 insertions(+), 4 deletions(-)
 create mode 100644 libc/src/__support/math/powf_small_tables.h

diff --git a/libc/src/__support/math/powf.h b/libc/src/__support/math/powf.h
index 20f59b59266d8..39439560db4ff 100644
--- a/libc/src/__support/math/powf.h
+++ b/libc/src/__support/math/powf.h
@@ -9,10 +9,22 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_POWF_H
 #define LLVM_LIBC_SRC___SUPPORT_MATH_POWF_H
 
+#include "src/__support/macros/optimization.h"
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&                               \
+    defined(LIBC_MATH_HAS_SMALL_TABLES)
+
+#include "src/__support/math/powf_small_tables.h"
+
+#else
+
 #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
 #include "exp10f.h"           // Speedup for powf(10, y) = exp10f(y)
 #include "exp2f.h"            // Speedup for powf(2, y) = exp2f(y)
 #include "exp_constants.h"
+
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES
+
 #include "src/__support/CPP/bit.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
@@ -23,7 +35,6 @@
 #include "src/__support/FPUtil/triple_double.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -34,8 +45,6 @@ namespace powf_internal {
 using fputil::DoubleDouble;
 using fputil::TripleDouble;
 
-using namespace common_constants_internal;
-
 #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 alignas(16) LIBC_INLINE_VAR constexpr DoubleDouble LOG2_R_DD[128] = {
     {0.0, 0.0},
@@ -654,7 +663,7 @@ LIBC_INLINE double powf_double_double(int idx_x, double dx, double y6,
 LIBC_INLINE float powf(float x, float y) {
   using namespace powf_internal;
   using FloatBits = typename fputil::FPBits<float>;
-  using DoubleBits = typename fputil::FPBits<double>;
+  using DoubleBits [[maybe_unused]] = typename fputil::FPBits<double>;
 
   FloatBits xbits(x), ybits(y);
 
@@ -847,6 +856,11 @@ LIBC_INLINE float powf(float x, float y) {
 
   ///////// END - Check exceptional cases //////////////////////////////////////
 
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&                               \
+    defined(LIBC_MATH_HAS_SMALL_TABLES)
+  return powf_small_tables(x, ex, sign, y);
+#else
+
   // x^y = 2^( y * log2(x) )
   //     = 2^( y * ( e_x + log2(m_x) ) )
   // First we compute log2(x) = e_x + log2(m_x)
@@ -1033,6 +1047,8 @@ LIBC_INLINE float powf(float x, float y) {
 
   return static_cast<float>(r_dd);
 #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES
 }
 
 } // namespace math
diff --git a/libc/src/__support/math/powf_small_tables.h b/libc/src/__support/math/powf_small_tables.h
new file mode 100644
index 0000000000000..3c5abbe9190ce
--- /dev/null
+++ b/libc/src/__support/math/powf_small_tables.h
@@ -0,0 +1,120 @@
+//===-- Implementation header for powf using less memory --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_POWF_SMALL_TABLES_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_POWF_SMALL_TABLES_H
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace powf_internal {
+
+LIBC_INLINE LIBC_CONSTEXPR float powf_small_tables(float x, int ex,
+                                                   uint64_t sign, float y) {
+  using FloatBits = fputil::FPBits<float>;
+  using DoubleBits = fputil::FPBits<double>;
+
+  constexpr double ONE_OVER_SQRT2 = 0x1.6a09e667f3bcdp-1;
+
+  // x^y = 2^( y * log2(x) )
+  //     = 2^( y * ( e_x + log2(m_x) ) )
+  // First we compute log2(x) = e_x + log2(m_x)
+  uint32_t x_u = FloatBits(x).uintval();
+
+  double yd = static_cast<double>(y);
+
+  // Extract exponent field of x.
+  ex += (x_u >> FloatBits::FRACTION_LEN);
+  double e_x = static_cast<double>(ex);
+
+  // Add the hidden bit to the mantissa.
+  // 1 <= m_x < 2
+  uint32_t x_mant = (x_u & FloatBits::FRACTION_MASK);
+  double m_x = static_cast<double>(cpp::bit_cast<float>(x_mant | 0x3f800000));
+  // Reduce to 1 <= mx <= sqrt(2).
+  if (x_mant > 0x0045'04f3) {
+    e_x += 0.5;
+    m_x *= ONE_OVER_SQRT2;
+  }
+  // 0 <= dx <= sqrt(2) - 1.
+  double dx = m_x - 1.0;
+
+  // Degree-13 polynomial approximation:
+  //   dx * P(dx) ~ log2(1 + dx)
+  // Generated by Sollya with:
+  // > P = fpminimax(log2(1 + x)/x, 13, [|D...|], [0, sqrt(2) - 1]);
+  // > dirtyinfnorm((log2(1 + x) - x*P)/log2(1 + x), [0, sqrt(2) - 1]);
+  //   0x1.b2d...p-53
+  constexpr double LOG2_COEFFS[] = {
+      0x1.71547652b82fdp0,   -0x1.71547652b7a2ap-1, 0x1.ec709dc2edfa6p-2,
+      -0x1.71547626a9d98p-2, 0x1.2776bf5f6f40ep-2,  -0x1.ec6fbbf289ce3p-3,
+      0x1.a60bf904470a7p-3,  -0x1.70ef61b01fc1ep-3, 0x1.45d3270454507p-3,
+      -0x1.1c5fc05b06e8fp-3, 0x1.d0f57944a937fp-4,  -0x1.413e22be24d32p-4,
+      0x1.3c84b66491ccp-5,   -0x1.3df9cfe5e602ep-7};
+
+  double dx2 = dx * dx;
+  double c0 = fputil::multiply_add(dx, LOG2_COEFFS[1], LOG2_COEFFS[0]);
+  double c1 = fputil::multiply_add(dx, LOG2_COEFFS[3], LOG2_COEFFS[2]);
+  double c2 = fputil::multiply_add(dx, LOG2_COEFFS[5], LOG2_COEFFS[4]);
+  double c3 = fputil::multiply_add(dx, LOG2_COEFFS[7], LOG2_COEFFS[6]);
+  double c4 = fputil::multiply_add(dx, LOG2_COEFFS[9], LOG2_COEFFS[8]);
+  double c5 = fputil::multiply_add(dx, LOG2_COEFFS[11], LOG2_COEFFS[10]);
+  double c6 = fputil::multiply_add(dx, LOG2_COEFFS[13], LOG2_COEFFS[12]);
+
+  double dx4 = dx2 * dx2;
+  double d0 = fputil::multiply_add(dx2, c1, c0);
+  double d1 = fputil::multiply_add(dx2, c3, c2);
+  double d2 = fputil::multiply_add(dx2, c5, c4);
+
+  double p = fputil::polyeval(dx4, d0, d1, d2, c6);
+  // u ~ y * log2(x).
+  double u = yd * fputil::multiply_add(dx, p, e_x);
+
+  double hi = fputil::nearest_integer(u);
+  double lo = u - hi;
+  int e_hi = static_cast<int>(hi) + DoubleBits::EXP_BIAS;
+  double exp_hi = cpp::bit_cast<double>(
+      (static_cast<uint64_t>(e_hi) << DoubleBits::FRACTION_LEN) | sign);
+  // Degree-6 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
+  // Generated by Sollya with:
+  // > P = fpminimax(2^x, 6, [|1, D...|], [-0.5, 0.5]);
+  // > dirtyinfnorm(2^x - P, [-0.5, 0.5]);
+  // 0x1.5f7...p-29
+  constexpr double EXP2_COEFFS[] = {
+      0x1.62e430c7b13a8p-1, 0x1.ebfbdd2f82f6fp-3, 0x1.c6aed4f186f34p-5,
+      0x1.3b2c96c9aa336p-7, 0x1.5f4553ff53f9p-10, 0x1.4278e5fa9de78p-13};
+
+  double lo2 = lo * lo;
+  double f0 = fputil::multiply_add(lo, EXP2_COEFFS[1], EXP2_COEFFS[0]);
+  double f1 = fputil::multiply_add(lo, EXP2_COEFFS[3], EXP2_COEFFS[2]);
+  double f2 = fputil::multiply_add(lo, EXP2_COEFFS[5], EXP2_COEFFS[4]);
+
+  double pp = fputil::polyeval(lo2, f0, f1, f2);
+
+  double r = fputil::multiply_add(lo, pp, 1.0);
+
+  double result = r * exp_hi;
+
+  return static_cast<float>(result);
+}
+
+} // namespace powf_internal
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_POWF_SMALL_TABLES_H