[libc-commits] [libc] 7dd4ce4 - [libc][stdfix] Fix overflow problem for fixed point sqrt when the inputs are close to max. (#90558)

Tue Apr 30 10:22:31 PDT 2024

Author: lntue
Date: 2024-04-30T13:22:27-04:00
New Revision: 7dd4ce484c8913ced124f2f62ac4c3eaafa9ef5f

URL: https://github.com/llvm/llvm-project/commit/7dd4ce484c8913ced124f2f62ac4c3eaafa9ef5f
DIFF: https://github.com/llvm/llvm-project/commit/7dd4ce484c8913ced124f2f62ac4c3eaafa9ef5f.diff

LOG: [libc][stdfix] Fix overflow problem for fixed point sqrt when the inputs are close to max. (#90558)

Fixes https://github.com/llvm/llvm-project/issues/89668

Added: 
    

Modified: 
    libc/src/__support/fixed_point/sqrt.h
    libc/test/src/stdfix/ISqrtTest.h
    libc/test/src/stdfix/SqrtTest.h
    libc/test/src/stdfix/uksqrtui_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/fixed_point/sqrt.h b/libc/src/__support/fixed_point/sqrt.h
index 4ec016ceab0028..982e318e2d1e33 100644

--- a/libc/src/__support/fixed_point/sqrt.h
+++ b/libc/src/__support/fixed_point/sqrt.h
@@ -55,18 +55,22 @@ template <> struct SqrtConfig<unsigned fract> {
   // Linear approximation for the initial values, with errors bounded by:
   //   max(1.5 * 2^-11, eps)
   // Generated with Sollya:
-  // > for i from 4 to 15 do {
+  // > for i from 4 to 14 do {
   //     P = fpminimax(sqrt(x), 1, [|16, 16|], [i * 2^-4, (i + 1)*2^-4],
   //                   fixed, absolute);
   //     print("{", coeff(P, 1), "ur,", coeff(P, 0), "ur},");
   //   };
+  // For the last interval [15/16, 1), we choose the linear function Q such that
+  //   Q(1) = 1 and Q(15/16) = P(15/16),
+  // where P is the polynomial generated by Sollya above for [14/16, 15/16].
+  // This is to prevent overflow in the last interval [15/16, 1).
   static constexpr Type FIRST_APPROX[12][2] = {
       {0x1.e378p-1ur, 0x1.0ebp-2ur},  {0x1.b512p-1ur, 0x1.2b94p-2ur},
       {0x1.91fp-1ur, 0x1.45dcp-2ur},  {0x1.7622p-1ur, 0x1.5e24p-2ur},
       {0x1.5f5ap-1ur, 0x1.74e4p-2ur}, {0x1.4c58p-1ur, 0x1.8a4p-2ur},
       {0x1.3c1ep-1ur, 0x1.9e84p-2ur}, {0x1.2e0cp-1ur, 0x1.b1d8p-2ur},
       {0x1.21aap-1ur, 0x1.c468p-2ur}, {0x1.16bap-1ur, 0x1.d62cp-2ur},
-      {0x1.0cfp-1ur, 0x1.e74cp-2ur},  {0x1.0418p-1ur, 0x1.f7ep-2ur},
+      {0x1.0cfp-1ur, 0x1.e74cp-2ur},  {0x1.039p-1ur, 0x1.f8ep-2ur},
   };
 };
 
@@ -77,11 +81,15 @@ template <> struct SqrtConfig<unsigned long fract> {
   // Linear approximation for the initial values, with errors bounded by:
   //   max(1.5 * 2^-11, eps)
   // Generated with Sollya:
-  // > for i from 4 to 15 do {
+  // > for i from 4 to 14 do {
   //     P = fpminimax(sqrt(x), 1, [|32, 32|], [i * 2^-4, (i + 1)*2^-4],
   //                   fixed, absolute);
   //     print("{", coeff(P, 1), "ulr,", coeff(P, 0), "ulr},");
   //   };
+  // For the last interval [15/16, 1), we choose the linear function Q such that
+  //   Q(1) = 1 and Q(15/16) = P(15/16),
+  // where P is the polynomial generated by Sollya above for [14/16, 15/16].
+  // This is to prevent overflow in the last interval [15/16, 1).
   static constexpr Type FIRST_APPROX[12][2] = {
       {0x1.e3779b98p-1ulr, 0x1.0eaff788p-2ulr},
       {0x1.b5167872p-1ulr, 0x1.2b908ad4p-2ulr},
@@ -94,7 +102,7 @@ template <> struct SqrtConfig<unsigned long fract> {
       {0x1.21b05c0ap-1ulr, 0x1.c45e023p-2ulr},
       {0x1.16becd02p-1ulr, 0x1.d624031p-2ulr},
       {0x1.0cf49fep-1ulr, 0x1.e743b844p-2ulr},
-      {0x1.04214e9cp-1ulr, 0x1.f7ce2c3cp-2ulr},
+      {0x1.038cdfcp-1ulr, 0x1.f8e6408p-2ulr},
   };
 };
 

diff  --git a/libc/test/src/stdfix/ISqrtTest.h b/libc/test/src/stdfix/ISqrtTest.h
index 405162b706a9f1..ddf292fdd083f3 100644
--- a/libc/test/src/stdfix/ISqrtTest.h
+++ b/libc/test/src/stdfix/ISqrtTest.h
@@ -27,6 +27,18 @@ template <typename T> class ISqrtTest : public LIBC_NAMESPACE::testing::Test {
 public:
   typedef OutType (*SqrtFunc)(T);
 
+  void testSpecificInput(T input, OutType result, double expected,
+                         double tolerance) {
+    double y_d = static_cast<double>(result);
+    double errors = LIBC_NAMESPACE::fputil::abs((y_d / expected) - 1.0);
+    if (errors > tolerance) {
+      // Print out the failure input and output.
+      EXPECT_EQ(input, T(0));
+      EXPECT_EQ(result, zero);
+    }
+    ASSERT_TRUE(errors <= tolerance);
+  }
+
   void testSpecialNumbers(SqrtFunc func) {
     EXPECT_EQ(zero, func(T(0)));
 
@@ -42,15 +54,9 @@ template <typename T> class ISqrtTest : public LIBC_NAMESPACE::testing::Test {
     for (int i = 0; i < COUNT; ++i) {
       x_d += 1.0;
       ++x;
-      double y_d = static_cast<double>(func(x));
-      double result = LIBC_NAMESPACE::fputil::sqrt(x_d);
-      double errors = LIBC_NAMESPACE::fputil::abs((y_d / result) - 1.0);
-      if (errors > ERR) {
-        // Print out the failure input and output.
-        EXPECT_EQ(x, T(0));
-        EXPECT_EQ(func(x), zero);
-      }
-      ASSERT_TRUE(errors <= ERR);
+      OutType result = func(x);
+      double expected = LIBC_NAMESPACE::fputil::sqrt(x_d);
+      testSpecificInput(x, result, expected, ERR);
     }
   }
 };

diff  --git a/libc/test/src/stdfix/SqrtTest.h b/libc/test/src/stdfix/SqrtTest.h
index 628be0deb770fb..47ec129fab2aed 100644
--- a/libc/test/src/stdfix/SqrtTest.h
+++ b/libc/test/src/stdfix/SqrtTest.h
@@ -42,7 +42,7 @@ template <typename T> class SqrtTest : public LIBC_NAMESPACE::testing::Test {
 
     constexpr size_t COUNT = 255;
     constexpr StorageType STEP =
-        ~StorageType(0) / static_cast<StorageType>(COUNT);
+        StorageType(~StorageType(0)) / static_cast<StorageType>(COUNT);
     constexpr double ERR = 3.0 * static_cast<double>(eps);
     StorageType x = 0;
     for (size_t i = 0; i < COUNT; ++i, x += STEP) {

diff  --git a/libc/test/src/stdfix/uksqrtui_test.cpp b/libc/test/src/stdfix/uksqrtui_test.cpp
index 0f4c057099daaf..c336d0ce1f6176 100644
--- a/libc/test/src/stdfix/uksqrtui_test.cpp
+++ b/libc/test/src/stdfix/uksqrtui_test.cpp
@@ -17,4 +17,13 @@ unsigned accum uksqrtui_fast(unsigned int x) {
 
 LIST_ISQRT_TESTS(UI, unsigned int, LIBC_NAMESPACE::uksqrtui);
 
+TEST_F(LlvmLibcISqrtUITest, LargeInteger) {
+  testSpecificInput(65529u, LIBC_NAMESPACE::uksqrtui(65529u), 0x1.fff8fep7,
+                    0x3.0p-16);
+}
+
 LIST_ISQRT_TESTS(UIFast, unsigned int, uksqrtui_fast);
+
+TEST_F(LlvmLibcISqrtUIFastTest, LargeInteger) {
+  testSpecificInput(65529u, uksqrtui_fast(65529u), 0x1.fff8fep7, 0x3.0p-16);
+}