[libc-commits] [libc] [libc][math] Implement a code-size optimized version of powf. (PR #190984)
via libc-commits
libc-commits at lists.llvm.org
Wed Apr 8 07:54:30 PDT 2026
https://github.com/lntue created https://github.com/llvm/llvm-project/pull/190984
Code size of powf on armv8m:
Before:
```
$ ls -l libc/src/math/generic/CMakeFiles/libc.src.math.generic.powf.dir/
total 12
-rw-r----- 1 lntue primarygroup 9812 Apr 8 14:51 powf.cpp.obj
```
After:
```
$ ls -l libc/src/math/generic/CMakeFiles/libc.src.math.generic.powf.dir/
total 8
-rw-r----- 1 lntue primarygroup 4700 Apr 8 14:50 powf.cpp.obj
```
>From 288fd199f819d522191e4a60e85f3060f4890c2d Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Wed, 8 Apr 2026 14:46:50 +0000
Subject: [PATCH] [libc][math] Implement a code-size optimized version of powf.
---
libc/src/__support/math/powf.h | 24 +++-
libc/src/__support/math/powf_small_tables.h | 120 ++++++++++++++++++++
2 files changed, 140 insertions(+), 4 deletions(-)
create mode 100644 libc/src/__support/math/powf_small_tables.h
diff --git a/libc/src/__support/math/powf.h b/libc/src/__support/math/powf.h
index 20f59b59266d8..39439560db4ff 100644
--- a/libc/src/__support/math/powf.h
+++ b/libc/src/__support/math/powf.h
@@ -9,10 +9,22 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_POWF_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_POWF_H
+#include "src/__support/macros/optimization.h"
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_SMALL_TABLES)
+
+#include "src/__support/math/powf_small_tables.h"
+
+#else
+
#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
#include "exp10f.h" // Speedup for powf(10, y) = exp10f(y)
#include "exp2f.h" // Speedup for powf(2, y) = exp2f(y)
#include "exp_constants.h"
+
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES
+
#include "src/__support/CPP/bit.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
@@ -23,7 +35,6 @@
#include "src/__support/FPUtil/triple_double.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
namespace LIBC_NAMESPACE_DECL {
@@ -34,8 +45,6 @@ namespace powf_internal {
using fputil::DoubleDouble;
using fputil::TripleDouble;
-using namespace common_constants_internal;
-
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
alignas(16) LIBC_INLINE_VAR constexpr DoubleDouble LOG2_R_DD[128] = {
{0.0, 0.0},
@@ -654,7 +663,7 @@ LIBC_INLINE double powf_double_double(int idx_x, double dx, double y6,
LIBC_INLINE float powf(float x, float y) {
using namespace powf_internal;
using FloatBits = typename fputil::FPBits<float>;
- using DoubleBits = typename fputil::FPBits<double>;
+ using DoubleBits [[maybe_unused]] = typename fputil::FPBits<double>;
FloatBits xbits(x), ybits(y);
@@ -847,6 +856,11 @@ LIBC_INLINE float powf(float x, float y) {
///////// END - Check exceptional cases //////////////////////////////////////
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_SMALL_TABLES)
+ return powf_small_tables(x, ex, sign, y);
+#else
+
// x^y = 2^( y * log2(x) )
// = 2^( y * ( e_x + log2(m_x) ) )
// First we compute log2(x) = e_x + log2(m_x)
@@ -1033,6 +1047,8 @@ LIBC_INLINE float powf(float x, float y) {
return static_cast<float>(r_dd);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES
}
} // namespace math
diff --git a/libc/src/__support/math/powf_small_tables.h b/libc/src/__support/math/powf_small_tables.h
new file mode 100644
index 0000000000000..3c5abbe9190ce
--- /dev/null
+++ b/libc/src/__support/math/powf_small_tables.h
@@ -0,0 +1,120 @@
+//===-- Implementation header for powf using less memory --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_POWF_SMALL_TABLES_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_POWF_SMALL_TABLES_H
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace powf_internal {
+
+LIBC_INLINE LIBC_CONSTEXPR float powf_small_tables(float x, int ex,
+ uint64_t sign, float y) {
+ using FloatBits = fputil::FPBits<float>;
+ using DoubleBits = fputil::FPBits<double>;
+
+ constexpr double ONE_OVER_SQRT2 = 0x1.6a09e667f3bcdp-1;
+
+ // x^y = 2^( y * log2(x) )
+ // = 2^( y * ( e_x + log2(m_x) ) )
+ // First we compute log2(x) = e_x + log2(m_x)
+ uint32_t x_u = FloatBits(x).uintval();
+
+ double yd = static_cast<double>(y);
+
+ // Extract exponent field of x.
+ ex += (x_u >> FloatBits::FRACTION_LEN);
+ double e_x = static_cast<double>(ex);
+
+ // Add the hidden bit to the mantissa.
+ // 1 <= m_x < 2
+ uint32_t x_mant = (x_u & FloatBits::FRACTION_MASK);
+ double m_x = static_cast<double>(cpp::bit_cast<float>(x_mant | 0x3f800000));
+ // Reduce to 1 <= mx <= sqrt(2).
+ if (x_mant > 0x0045'04f3) {
+ e_x += 0.5;
+ m_x *= ONE_OVER_SQRT2;
+ }
+ // 0 <= dx <= sqrt(2) - 1.
+ double dx = m_x - 1.0;
+
+ // Degree-13 polynomial approximation:
+ // dx * P(dx) ~ log2(1 + dx)
+ // Generated by Sollya with:
+ // > P = fpminimax(log2(1 + x)/x, 13, [|D...|], [0, sqrt(2) - 1]);
+ // > dirtyinfnorm((log2(1 + x) - x*P)/log2(1 + x), [0, sqrt(2) - 1]);
+ // 0x1.b2d...p-53
+ constexpr double LOG2_COEFFS[] = {
+ 0x1.71547652b82fdp0, -0x1.71547652b7a2ap-1, 0x1.ec709dc2edfa6p-2,
+ -0x1.71547626a9d98p-2, 0x1.2776bf5f6f40ep-2, -0x1.ec6fbbf289ce3p-3,
+ 0x1.a60bf904470a7p-3, -0x1.70ef61b01fc1ep-3, 0x1.45d3270454507p-3,
+ -0x1.1c5fc05b06e8fp-3, 0x1.d0f57944a937fp-4, -0x1.413e22be24d32p-4,
+ 0x1.3c84b66491ccp-5, -0x1.3df9cfe5e602ep-7};
+
+ double dx2 = dx * dx;
+ double c0 = fputil::multiply_add(dx, LOG2_COEFFS[1], LOG2_COEFFS[0]);
+ double c1 = fputil::multiply_add(dx, LOG2_COEFFS[3], LOG2_COEFFS[2]);
+ double c2 = fputil::multiply_add(dx, LOG2_COEFFS[5], LOG2_COEFFS[4]);
+ double c3 = fputil::multiply_add(dx, LOG2_COEFFS[7], LOG2_COEFFS[6]);
+ double c4 = fputil::multiply_add(dx, LOG2_COEFFS[9], LOG2_COEFFS[8]);
+ double c5 = fputil::multiply_add(dx, LOG2_COEFFS[11], LOG2_COEFFS[10]);
+ double c6 = fputil::multiply_add(dx, LOG2_COEFFS[13], LOG2_COEFFS[12]);
+
+ double dx4 = dx2 * dx2;
+ double d0 = fputil::multiply_add(dx2, c1, c0);
+ double d1 = fputil::multiply_add(dx2, c3, c2);
+ double d2 = fputil::multiply_add(dx2, c5, c4);
+
+ double p = fputil::polyeval(dx4, d0, d1, d2, c6);
+ // u ~ y * log2(x).
+ double u = yd * fputil::multiply_add(dx, p, e_x);
+
+ double hi = fputil::nearest_integer(u);
+ double lo = u - hi;
+ int e_hi = static_cast<int>(hi) + DoubleBits::EXP_BIAS;
+ double exp_hi = cpp::bit_cast<double>(
+ (static_cast<uint64_t>(e_hi) << DoubleBits::FRACTION_LEN) | sign);
+ // Degree-6 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
+ // Generated by Sollya with:
+ // > P = fpminimax(2^x, 6, [|1, D...|], [-0.5, 0.5]);
+ // > dirtyinfnorm(2^x - P, [-0.5, 0.5]);
+ // 0x1.5f7...p-29
+ constexpr double EXP2_COEFFS[] = {
+ 0x1.62e430c7b13a8p-1, 0x1.ebfbdd2f82f6fp-3, 0x1.c6aed4f186f34p-5,
+ 0x1.3b2c96c9aa336p-7, 0x1.5f4553ff53f9p-10, 0x1.4278e5fa9de78p-13};
+
+ double lo2 = lo * lo;
+ double f0 = fputil::multiply_add(lo, EXP2_COEFFS[1], EXP2_COEFFS[0]);
+ double f1 = fputil::multiply_add(lo, EXP2_COEFFS[3], EXP2_COEFFS[2]);
+ double f2 = fputil::multiply_add(lo, EXP2_COEFFS[5], EXP2_COEFFS[4]);
+
+ double pp = fputil::polyeval(lo2, f0, f1, f2);
+
+ double r = fputil::multiply_add(lo, pp, 1.0);
+
+ double result = r * exp_hi;
+
+ return static_cast<float>(result);
+}
+
+} // namespace powf_internal
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_POWF_SMALL_TABLES_H
More information about the libc-commits
mailing list