[libc-commits] [libc] [libc][math] Fix exact cases for powf. (PR #91488)

Wed May 8 08:33:53 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-libc

Author: None (lntue)

<details>
<summary>Changes</summary>

It was reported from the CORE-MATH project that the `powf` implementation did not round correctly when `x^y` is either exact for exactly half-way.

This PR deals with the potential exact cases when `y` is an integer `2 < y < 25`.  In such cases, the results of `x^y` is exactly representable in double precision.

---
Full diff: https://github.com/llvm/llvm-project/pull/91488.diff


2 Files Affected:

- (modified) libc/src/math/generic/powf.cpp (+21-2) 
- (modified) libc/test/src/math/powf_test.cpp (+13-6) 


``````````diff

diff --git a/libc/src/math/generic/powf.cpp b/libc/src/math/generic/powf.cpp
index 0450ffd711fff..59efc3f424c76 100644
--- a/libc/src/math/generic/powf.cpp
+++ b/libc/src/math/generic/powf.cpp
@@ -528,7 +528,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
   // So if |y| > 151 * 2^24, and x is finite:
   //   |y * log2(x)| = 0 or > 151.
   // Hence x^y will either overflow or underflow if x is not zero.
-  if (LIBC_UNLIKELY((y_abs & 0x007f'ffff) == 0) || (y_abs > 0x4f170000)) {
+  if (LIBC_UNLIKELY((y_abs & 0x0007'ffff) == 0) || (y_abs > 0x4f170000)) {
     // Exceptional exponents.
     switch (y_abs) {
     case 0x0000'0000: { // y = +-0.0f
@@ -572,6 +572,26 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
         // case 0xbf00'0000:  // pow(x, -1/2) = rsqrt(x)
         //   return rsqrt(x);
       }
+      if (is_integer(y) && (y_u > 0x4000'0000) && (y_u <= 0x41c0'0000)) {
+        // Check for exact cases when 2 < y < 25 and y is an integer.
+        int msb =
+            (x_abs == 0) ? (FloatBits::TOTAL_LEN - 2) : cpp::countl_zero(x_abs);
+        msb = (msb > FloatBits::EXP_LEN) ? msb : FloatBits::EXP_LEN;
+        int lsb = (x_abs == 0) ? 0 : cpp::countr_zero(x_abs);
+        lsb = (lsb > FloatBits::FRACTION_LEN) ? FloatBits::FRACTION_LEN : lsb;
+        int extra_bits = FloatBits::TOTAL_LEN - 2 - lsb - msb;
+        int iter = static_cast<int>(y);
+
+        if (extra_bits * iter <= FloatBits::FRACTION_LEN + 2) {
+          // The result is either exact or exactly half-way.
+          // But it is exactly representable in double precision.
+          double x_d = static_cast<double>(x);
+          double result = x_d;
+          for (int i = 1; i < iter; ++i)
+            result *= x_d;
+          return static_cast<float>(result);
+        }
+      }
       if (y_abs > 0x4f17'0000) {
         if (y_abs > 0x7f80'0000) {
           // y is NaN
@@ -834,7 +854,6 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
   return static_cast<float>(
              powf_double_double(idx_x, dx, y6, lo6_hi, exp2_hi_mid_dd)) +
          0.0f;
-  // return static_cast<float>(r);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/powf_test.cpp b/libc/test/src/math/powf_test.cpp
index 69135593cd32c..797913e5b7eef 100644
--- a/libc/test/src/math/powf_test.cpp
+++ b/libc/test/src/math/powf_test.cpp
@@ -22,14 +22,21 @@ using LIBC_NAMESPACE::testing::tlog;
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
 TEST_F(LlvmLibcPowfTest, TrickyInputs) {
-  constexpr int N = 11;
+  constexpr int N = 13;
   constexpr mpfr::BinaryInput<float> INPUTS[N] = {
-      {0x1.290bbp-124f, 0x1.1e6d92p-25f}, {0x1.2e9fb6p+5f, -0x1.1b82b6p-18f},
-      {0x1.6877f6p+60f, -0x1.75f1c6p-4f}, {0x1.0936acp-63f, -0x1.55200ep-15f},
-      {0x1.d6d72ap+43f, -0x1.749ccap-5f}, {0x1.4afb2ap-40f, 0x1.063198p+0f},
-      {0x1.0124dep+0f, -0x1.fdb016p+9f},  {0x1.1058p+0f, 0x1.ap+64f},
-      {0x1.1058p+0f, -0x1.ap+64f},        {0x1.1058p+0f, 0x1.ap+64f},
+      {0x1.290bbp-124f, 0x1.1e6d92p-25f},
+      {0x1.2e9fb6p+5f, -0x1.1b82b6p-18f},
+      {0x1.6877f6p+60f, -0x1.75f1c6p-4f},
+      {0x1.0936acp-63f, -0x1.55200ep-15f},
+      {0x1.d6d72ap+43f, -0x1.749ccap-5f},
+      {0x1.4afb2ap-40f, 0x1.063198p+0f},
+      {0x1.0124dep+0f, -0x1.fdb016p+9f},
+      {0x1.1058p+0f, 0x1.ap+64f},
+      {0x1.1058p+0f, -0x1.ap+64f},
+      {0x1.1058p+0f, 0x1.ap+64f},
       {0x1.fa32d4p-1f, 0x1.67a62ep+12f},
+      {-0x1.8p-49, 0x1.8p+1},
+      {0x1.8p-48, 0x1.8p+1},
   };
 
   for (int i = 0; i < N; ++i) {

``````````

</details>


https://github.com/llvm/llvm-project/pull/91488