[lldb-dev] [llvm] r189624 - Change default # of digits for APFloat::toString

Eli Friedman eli.friedman at gmail.com
Thu Sep 5 13:37:36 PDT 2013


On Wed, Sep 4, 2013 at 7:21 AM, Thirumurthi, Ashok <
ashok.thirumurthi at intel.com> wrote:

> Hi Eli,
>
> I noticed that your commit introduces a regression in the LLDB test suite
> because expression evaluation of a floating point constant like 2.234f
> results in a value like 2.23399997.  I suspect that this occurs because
> 2.234f is really just the closest number to 2.23399997 that can be
> represented using floating point precision.  I noticed that your commit
> increases the default number of digits in the precision of APFloat.  I can
> see how that's useful when performing intermediate computation, but I would
> have expected APFloat::toString to cleverly avoid reality.
>
> The attached black magic fixes the regressions in the IRInterpreter used
> for expression evaluation.  However, I'm wondering if the correct fix is to
> revert your commit (i.e. in favor of mode to select the default precision
> as nominal precision versus active precision).  Cheers,
>
>
What APFloat::toString really needs to do by default is consistently print
the least number of digits necessary to re-parse to the same value.  Using
a fixed number of digits is never going to what we want.  Unfortunately,
that patch is a lot more complicated, and I don't really have the time or
expertise to push it through.  If you're interested, I've attached my
work-in-progress, based on the Dragon4 algorithm from "How to Print
Floating-Point Numbers Accurately" by Steele and White.

-Eli
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/lldb-dev/attachments/20130905/c99728b9/attachment.html>
-------------- next part --------------
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 676e2d4..547a694 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3428,84 +3428,6 @@ namespace {
   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
     Buffer.append(Str.begin(), Str.end());
   }
-
-  /// Removes data from the given significand until it is no more
-  /// precise than is required for the desired precision.
-  void AdjustToPrecision(APInt &significand,
-                         int &exp, unsigned FormatPrecision) {
-    unsigned bits = significand.getActiveBits();
-
-    // 196/59 is a very slight overestimate of lg_2(10).
-    unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
-
-    if (bits <= bitsRequired) return;
-
-    unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
-    if (!tensRemovable) return;
-
-    exp += tensRemovable;
-
-    APInt divisor(significand.getBitWidth(), 1);
-    APInt powten(significand.getBitWidth(), 10);
-    while (true) {
-      if (tensRemovable & 1)
-        divisor *= powten;
-      tensRemovable >>= 1;
-      if (!tensRemovable) break;
-      powten *= powten;
-    }
-
-    significand = significand.udiv(divisor);
-
-    // Truncate the significand down to its active bit count.
-    significand = significand.trunc(significand.getActiveBits());
-  }
-
-
-  void AdjustToPrecision(SmallVectorImpl<char> &buffer,
-                         int &exp, unsigned FormatPrecision) {
-    unsigned N = buffer.size();
-    if (N <= FormatPrecision) return;
-
-    // The most significant figures are the last ones in the buffer.
-    unsigned FirstSignificant = N - FormatPrecision;
-
-    // Round.
-    // FIXME: this probably shouldn't use 'round half up'.
-
-    // Rounding down is just a truncation, except we also want to drop
-    // trailing zeros from the new result.
-    if (buffer[FirstSignificant - 1] < '5') {
-      while (FirstSignificant < N && buffer[FirstSignificant] == '0')
-        FirstSignificant++;
-
-      exp += FirstSignificant;
-      buffer.erase(&buffer[0], &buffer[FirstSignificant]);
-      return;
-    }
-
-    // Rounding up requires a decimal add-with-carry.  If we continue
-    // the carry, the newly-introduced zeros will just be truncated.
-    for (unsigned I = FirstSignificant; I != N; ++I) {
-      if (buffer[I] == '9') {
-        FirstSignificant++;
-      } else {
-        buffer[I]++;
-        break;
-      }
-    }
-
-    // If we carried through, we have exactly one digit of precision.
-    if (FirstSignificant == N) {
-      exp += FirstSignificant;
-      buffer.clear();
-      buffer.push_back('1');
-      return;
-    }
-
-    exp += FirstSignificant;
-    buffer.erase(&buffer[0], &buffer[FirstSignificant]);
-  }
 }
 
 void APFloat::toString(SmallVectorImpl<char> &Str,
@@ -3538,96 +3460,105 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
     Str.push_back('-');
 
   // Decompose the number into an APInt and an exponent.
-  int exp = exponent - ((int) semantics->precision - 1);
-  APInt significand(semantics->precision,
-                    makeArrayRef(significandParts(),
-                                 partCountForBits(semantics->precision)));
-
-  // Set FormatPrecision if zero.  We want to do this before we
-  // truncate trailing zeros, as those are part of the precision.
-  if (!FormatPrecision) {
-    // We use enough digits so the number can be round-tripped back to an
-    // APFloat. The formula comes from "How to Print Floating-Point Numbers
-    // Accurately" by Steele and White.
-    // FIXME: Using a formula based purely on the precision is conservative;
-    // we can print fewer digits depending on the actual value being printed.
-
-    // FormatPrecision = 2 + floor(significandBits / lg_2(10))
-    FormatPrecision = 2 + semantics->precision * 59 / 196;
-  }
-
-  // Ignore trailing binary zeros.
-  int trailingZeros = significand.countTrailingZeros();
-  exp += trailingZeros;
-  significand = significand.lshr(trailingZeros);
-
-  // Change the exponent from 2^e to 10^e.
-  if (exp == 0) {
-    // Nothing to do.
-  } else if (exp > 0) {
-    // Just shift left.
-    significand = significand.zext(semantics->precision + exp);
-    significand <<= exp;
-    exp = 0;
-  } else { /* exp < 0 */
-    int texp = -exp;
-
-    // We transform this using the identity:
-    //   (N)(2^-e) == (N)(5^e)(10^-e)
-    // This means we have to multiply N (the significand) by 5^e.
-    // To avoid overflow, we have to operate on numbers large
-    // enough to store N * 5^e:
-    //   log2(N * 5^e) == log2(N) + e * log2(5)
-    //                 <= semantics->precision + e * 137 / 59
-    //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
-
-    unsigned precision = semantics->precision + (137 * texp + 136) / 59;
-
-    // Multiply significand by 5^e.
-    //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
-    significand = significand.zext(precision);
-    APInt five_to_the_i(precision, 5);
-    while (true) {
-      if (texp & 1) significand *= five_to_the_i;
-
-      texp >>= 1;
-      if (!texp) break;
-      five_to_the_i *= five_to_the_i;
+  APInt f(semantics->precision,
+          makeArrayRef(significandParts(),
+                       partCountForBits(semantics->precision)));
+  int p = semantics->precision;
+  // We adjust the exponent to account for the change in
+  // floating-point format.
+  int e = exponent + 1;
+
+  unsigned BitWidth = semantics->precision * 2 + semantics->maxExponent;
+  f = f.zext(BitWidth);
+  APInt R = f.shl(std::max(e-p, 0));
+  APInt S = APInt(BitWidth, 1).shl(std::max(0, -(e - p)));
+  APInt Mminus = APInt(BitWidth, 1).shl(std::max(e-p, 0));
+  APInt Mplus = Mminus;
+  APInt Ten = APInt(BitWidth, 10);
+
+  // Fixup
+  if (f.isPowerOf2()) {
+    Mplus = Mplus.shl(1);
+    R = R.shl(1);
+    S = S.shl(1);
+  }
+  int k = 0;
+  APInt SOverTen = S.udiv(Ten);
+  while (R.ult(SOverTen)) {
+    k -= 1;
+    R *= Ten;
+    Mminus *= Ten;
+    Mplus *= Ten;
+  }
+  int CutoffPlace = -(int)FormatPrecision;
+  bool RoundUpFlag = false;
+  do {
+    while ((R.shl(1) + Mplus).uge(S.shl(1))) {
+      S *= Ten;
+      k += 1;
     }
-  }
+    if (!FormatPrecision) {
+      CutoffPlace = k;
+    } else {
+      CutoffPlace = k + CutoffPlace;
+      APInt y = S;
+      int a = CutoffPlace - k;
+      assert(a < 0);
+      for (int i = 0; i < -a; ++i) {
+        APInt rem;
+        APInt::udivrem(y, Ten, y, rem);
+        if (rem != 0)
+          ++y;
+      }
+      Mminus = APIntOps::umax(y, Mminus);
+      Mplus = APIntOps::umax(y, Mplus);
+      if (Mplus == y)
+        RoundUpFlag = true;
+    }
+  } while ((R.shl(1) + Mplus).uge(S.shl(1)));
 
-  AdjustToPrecision(significand, exp, FormatPrecision);
+  int exp = k;
 
+  bool low, high;
+  APInt U(BitWidth, 0);
   SmallVector<char, 256> buffer;
-
-  // Fill the buffer.
-  unsigned precision = significand.getBitWidth();
-  APInt ten(precision, 10);
-  APInt digit(precision, 0);
-
-  bool inTrail = true;
-  while (significand != 0) {
-    // digit <- significand % 10
-    // significand <- significand / 10
-    APInt::udivrem(significand, ten, significand, digit);
-
-    unsigned d = digit.getZExtValue();
-
-    // Drop trailing zeros.
-    if (inTrail && !d) exp++;
-    else {
-      buffer.push_back((char) ('0' + d));
-      inTrail = false;
-    }
+  do {
+    k -= 1;
+    APInt::udivrem(R * Ten, S, U, R);
+    Mminus *= Ten;
+    Mplus *= Ten;
+
+    low = R.shl(1).ult(Mminus);
+    if (RoundUpFlag)
+      high = R.shl(1).uge(S.shl(1)-Mplus);
+    else
+      high = R.shl(1).ugt(S.shl(1)-Mplus);
+    if (low || high || k == CutoffPlace)
+      break;
+    buffer.push_back('0'+U.getZExtValue());
+  } while (1);
+
+  if (low && !high) {
+    buffer.push_back('0'+U.getZExtValue());
+  } else if (high && !low) {
+    buffer.push_back('0'+U.getZExtValue() + 1);
+  } else if (R.shl(1).ult(S)) {
+    buffer.push_back('0'+U.getZExtValue());
+  } else if (R.shl(1).ugt(S)) {
+    buffer.push_back('0'+U.getZExtValue() + 1);
+  } else if (U.getZExtValue() % 2 == 0) {
+    buffer.push_back('0'+U.getZExtValue());
+  } else {
+    buffer.push_back('0'+U.getZExtValue() + 1);
   }
 
-  assert(!buffer.empty() && "no characters in buffer!");
-
-  // Drop down to FormatPrecision.
-  // TODO: don't do more precise calculations above than are required.
-  AdjustToPrecision(buffer, exp, FormatPrecision);
+  if (RoundUpFlag) {
+    while (buffer.size() < FormatPrecision)
+      buffer.push_back('0');
+  }
 
   unsigned NDigits = buffer.size();
+  exp -= NDigits;
 
   // Check whether we should use scientific notation.
   bool FormatScientific;
@@ -3658,13 +3589,12 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
   if (FormatScientific) {
     exp += (NDigits - 1);
 
-    Str.push_back(buffer[NDigits-1]);
+    Str.push_back(buffer[0]);
     Str.push_back('.');
+    for (unsigned I = 1; I != NDigits; ++I)
+      Str.push_back(buffer[I]);
     if (NDigits == 1)
       Str.push_back('0');
-    else
-      for (unsigned I = 1; I != NDigits; ++I)
-        Str.push_back(buffer[NDigits-1-I]);
     Str.push_back('E');
 
     Str.push_back(exp >= 0 ? '+' : '-');
@@ -3682,7 +3612,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
   // Non-scientific, positive exponents.
   if (exp >= 0) {
     for (unsigned I = 0; I != NDigits; ++I)
-      Str.push_back(buffer[NDigits-1-I]);
+      Str.push_back(buffer[I]);
     for (unsigned I = 0; I != (unsigned) exp; ++I)
       Str.push_back('0');
     return;
@@ -3696,7 +3626,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
   unsigned I = 0;
   if (NWholeDigits > 0) {
     for (; I != (unsigned) NWholeDigits; ++I)
-      Str.push_back(buffer[NDigits-I-1]);
+      Str.push_back(buffer[I]);
     Str.push_back('.');
   } else {
     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
@@ -3708,7 +3638,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
   }
 
   for (; I != NDigits; ++I)
-    Str.push_back(buffer[NDigits-I-1]);
+    Str.push_back(buffer[I]);
 }
 
 bool APFloat::getExactInverse(APFloat *inv) const {
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index e57c8d4..ab4c145 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -858,19 +858,22 @@ TEST(APFloatTest, fromHexadecimalString) {
 }
 
 TEST(APFloatTest, toString) {
-  ASSERT_EQ("10", convertToString(10.0, 6, 3));
-  ASSERT_EQ("1.0E+1", convertToString(10.0, 6, 0));
+  ASSERT_EQ("1.80E+308", convertToString(1.7976931348623157E+308, 3, 3));
+  ASSERT_EQ("10.0000", convertToString(10.0, 6, 3));
+  ASSERT_EQ("1.00000E+1", convertToString(10.0, 6, 0));
   ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2));
-  ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 4, 2));
-  ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 5, 1));
-  ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2));
-  ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2));
-  ASSERT_EQ("1.01E-2", convertToString(1.01E-2, 5, 1));
-  ASSERT_EQ("0.78539816339744828", convertToString(0.78539816339744830961, 0, 3));
-  ASSERT_EQ("4.9406564584124654E-324", convertToString(4.9406564584124654e-324, 0, 3));
-  ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1));
-  ASSERT_EQ("8.7318340000000001E+2", convertToString(873.1834, 0, 0));
+  ASSERT_EQ("1.010E+4", convertToString(1.01E+4, 4, 2));
+  ASSERT_EQ("10100", convertToString(1.01E+4, 5, 1));
+  ASSERT_EQ("0.010100", convertToString(1.01E-2, 5, 2));
+  ASSERT_EQ("0.01010", convertToString(1.01E-2, 4, 2));
+  ASSERT_EQ("1.0100E-2", convertToString(1.01E-2, 5, 1));
+  ASSERT_EQ("0.7853981633974483", convertToString(0.78539816339744830961, 0, 3));
+  ASSERT_EQ("4.0E-324", convertToString(4.9406564584124654e-324, 0, 3));
+  ASSERT_EQ("873.1834", convertToString(873.1834, 0, 1));
+  ASSERT_EQ("8.731834E+2", convertToString(873.1834, 0, 0));
   ASSERT_EQ("1.7976931348623157E+308", convertToString(1.7976931348623157E+308, 0, 0));
+  ASSERT_EQ("1.80E+308", convertToString(1.7976931348623157E+308, 3, 3));
+  ASSERT_EQ("1.7976931348623157E+308", convertToString(1.7976931348623157E+308, 20, 0));
 }
 
 TEST(APFloatTest, toInteger) {


More information about the lldb-dev mailing list