[flang-commits] [flang] ce6f030 - [flang] Tuning up binary->decimal conversion

Tue Jul 14 13:23:18 PDT 2020

Author: peter klausler
Date: 2020-07-14T13:22:26-07:00
New Revision: ce6f0303df51667f42a2a63eb292ab7c0c125eea

URL: https://github.com/llvm/llvm-project/commit/ce6f0303df51667f42a2a63eb292ab7c0c125eea
DIFF: https://github.com/llvm/llvm-project/commit/ce6f0303df51667f42a2a63eb292ab7c0c125eea.diff

LOG: [flang] Tuning up binary->decimal conversion

Use short division of big-radix values by powers of two when
converting values with negative unbiased exponents rather than
multiplication by smaller powers of five; this reduces the overall
outer iteration count. This change is a win across the entire range
of inputs.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D83806

Added: 
    

Modified: 
    flang/include/flang/Decimal/decimal.h
    flang/lib/Decimal/big-radix-floating-point.h
    flang/lib/Decimal/binary-to-decimal.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h
index 0bc9deb08f4c..fa687e92d35b 100644

--- a/flang/include/flang/Decimal/decimal.h
+++ b/flang/include/flang/Decimal/decimal.h
@@ -69,7 +69,7 @@ enum DecimalConversionFlags {
  * some extra due to the library working internally in base 10**16
  * and computing its output size in multiples of 16.
  */
-#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 16 - 1)
+#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 2 * 16 - 1)
 
 #ifdef __cplusplus
 template <int PREC>

diff  --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 53bd9d724914..2fbb777104d6 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -222,15 +222,46 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return remainder;
   }
 
-  int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
-    int remainder{0};
+  void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
+    Digit remainder{0};
+    auto mask{(Digit{1} << twoPow) - 1};
+    auto coeff{radix >> twoPow};
     for (int j{digits_ - 1}; j >= 0; --j) {
-      Digit q{digit_[j] >> twoPow};
-      int nrem = digit_[j] - (q << twoPow);
-      digit_[j] = q + (radix >> twoPow) * remainder;
+      auto nrem{digit_[j] & mask};
+      digit_[j] = (digit_[j] >> twoPow) + coeff * remainder;
       remainder = nrem;
     }
-    return remainder;
+  }
+
+  // Returns true on overflow
+  bool DivideByPowerOfTwoInPlace(int twoPow) {
+    if (digits_ > 0) {
+      while (twoPow > 0) {
+        int chunk{twoPow > log10Radix ? log10Radix : twoPow};
+        if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) {
+          DivideByPowerOfTwo(chunk);
+          twoPow -= chunk;
+          continue;
+        }
+        twoPow -= chunk;
+        if (digit_[digits_ - 1] >> chunk != 0) {
+          if (digits_ == digitLimit_) {
+            return true; // overflow
+          }
+          digit_[digits_++] = 0;
+        }
+        auto remainder{digit_[digits_ - 1]};
+        exponent_ -= log10Radix;
+        auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix
+        auto mask{(Digit{1} << chunk) - 1};
+        for (int j{digits_ - 1}; j >= 1; --j) {
+          digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder;
+          remainder = digit_[j - 1] & mask;
+        }
+        digit_[0] = coeff * remainder;
+      }
+    }
+    return false; // no overflow
   }
 
   int AddCarry(int position = 0, int carry = 1) {

diff  --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp
index ad30b4d85403..bcc0f08558aa 100644
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -70,42 +70,8 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::BigRadixFloatingPointNumber(
     overflow |= MultiplyBy<2>();
   }
 
-  while (twoPow < 0) {
-    int shift{common::TrailingZeroBitCount(digit_[0])};
-    if (shift == 0) {
-      break;
-    }
-    if (shift > log10Radix) {
-      shift = log10Radix;
-    }
-    if (shift > -twoPow) {
-      shift = -twoPow;
-    }
-    // (D*(2**S)) * 10.**E * 2.**twoPow -> D * 10.**E * 2.**(twoPow+S)
-    DivideByPowerOfTwo(shift);
-    twoPow += shift;
-  }
-
-  for (; twoPow <= -4; twoPow += 4) {
-    // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4)
-    overflow |= MultiplyBy<(5 * 5 * 5 * 5)>();
-    exponent_ -= 4;
-  }
-  if (twoPow <= -2) {
-    // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2)
-    overflow |= MultiplyBy<5 * 5>();
-    twoPow += 2;
-    exponent_ -= 2;
-  }
-  for (; twoPow < 0; ++twoPow) {
-    // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1)
-    overflow |= MultiplyBy<5>();
-    --exponent_;
-  }
-
+  overflow |= DivideByPowerOfTwoInPlace(-twoPow);
   assert(overflow == 0);
-
-  // twoPow == 0, the decimal encoding is complete.
   Normalize();
 }
 
@@ -153,7 +119,7 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToDecimal(char *buffer,
     for (int k{0}; k < log10Radix; k += 2) {
       Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
       dig = 100 * (dig - d * hundredth);
-      const char *q = lut + 2 * d;
+      const char *q{lut + 2 * d};
       *p++ = q[0];
       *p++ = q[1];
     }