[flang-commits] [flang] ce6f030 - [flang] Tuning up binary->decimal conversion
peter klausler via flang-commits
flang-commits at lists.llvm.org
Tue Jul 14 13:23:18 PDT 2020
Author: peter klausler
Date: 2020-07-14T13:22:26-07:00
New Revision: ce6f0303df51667f42a2a63eb292ab7c0c125eea
URL: https://github.com/llvm/llvm-project/commit/ce6f0303df51667f42a2a63eb292ab7c0c125eea
DIFF: https://github.com/llvm/llvm-project/commit/ce6f0303df51667f42a2a63eb292ab7c0c125eea.diff
LOG: [flang] Tuning up binary->decimal conversion
Use short division of big-radix values by powers of two when
converting values with negative unbiased exponents rather than
multiplication by smaller powers of five; this reduces the overall
outer iteration count. This change is a win across the entire range
of inputs.
Reviewed By: tskeith
Differential Revision: https://reviews.llvm.org/D83806
Added:
Modified:
flang/include/flang/Decimal/decimal.h
flang/lib/Decimal/big-radix-floating-point.h
flang/lib/Decimal/binary-to-decimal.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h
index 0bc9deb08f4c..fa687e92d35b 100644
--- a/flang/include/flang/Decimal/decimal.h
+++ b/flang/include/flang/Decimal/decimal.h
@@ -69,7 +69,7 @@ enum DecimalConversionFlags {
* some extra due to the library working internally in base 10**16
* and computing its output size in multiples of 16.
*/
-#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 16 - 1)
+#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 2 * 16 - 1)
#ifdef __cplusplus
template <int PREC>
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 53bd9d724914..2fbb777104d6 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -222,15 +222,46 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
return remainder;
}
- int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
- int remainder{0};
+ void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
+ Digit remainder{0};
+ auto mask{(Digit{1} << twoPow) - 1};
+ auto coeff{radix >> twoPow};
for (int j{digits_ - 1}; j >= 0; --j) {
- Digit q{digit_[j] >> twoPow};
- int nrem = digit_[j] - (q << twoPow);
- digit_[j] = q + (radix >> twoPow) * remainder;
+ auto nrem{digit_[j] & mask};
+ digit_[j] = (digit_[j] >> twoPow) + coeff * remainder;
remainder = nrem;
}
- return remainder;
+ }
+
+ // Returns true on overflow
+ bool DivideByPowerOfTwoInPlace(int twoPow) {
+ if (digits_ > 0) {
+ while (twoPow > 0) {
+ int chunk{twoPow > log10Radix ? log10Radix : twoPow};
+ if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) {
+ DivideByPowerOfTwo(chunk);
+ twoPow -= chunk;
+ continue;
+ }
+ twoPow -= chunk;
+ if (digit_[digits_ - 1] >> chunk != 0) {
+ if (digits_ == digitLimit_) {
+ return true; // overflow
+ }
+ digit_[digits_++] = 0;
+ }
+ auto remainder{digit_[digits_ - 1]};
+ exponent_ -= log10Radix;
+ auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix
+ auto mask{(Digit{1} << chunk) - 1};
+ for (int j{digits_ - 1}; j >= 1; --j) {
+ digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder;
+ remainder = digit_[j - 1] & mask;
+ }
+ digit_[0] = coeff * remainder;
+ }
+ }
+ return false; // no overflow
}
int AddCarry(int position = 0, int carry = 1) {
diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp
index ad30b4d85403..bcc0f08558aa 100644
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -70,42 +70,8 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::BigRadixFloatingPointNumber(
overflow |= MultiplyBy<2>();
}
- while (twoPow < 0) {
- int shift{common::TrailingZeroBitCount(digit_[0])};
- if (shift == 0) {
- break;
- }
- if (shift > log10Radix) {
- shift = log10Radix;
- }
- if (shift > -twoPow) {
- shift = -twoPow;
- }
- // (D*(2**S)) * 10.**E * 2.**twoPow -> D * 10.**E * 2.**(twoPow+S)
- DivideByPowerOfTwo(shift);
- twoPow += shift;
- }
-
- for (; twoPow <= -4; twoPow += 4) {
- // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4)
- overflow |= MultiplyBy<(5 * 5 * 5 * 5)>();
- exponent_ -= 4;
- }
- if (twoPow <= -2) {
- // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2)
- overflow |= MultiplyBy<5 * 5>();
- twoPow += 2;
- exponent_ -= 2;
- }
- for (; twoPow < 0; ++twoPow) {
- // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1)
- overflow |= MultiplyBy<5>();
- --exponent_;
- }
-
+ overflow |= DivideByPowerOfTwoInPlace(-twoPow);
assert(overflow == 0);
-
- // twoPow == 0, the decimal encoding is complete.
Normalize();
}
@@ -153,7 +119,7 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToDecimal(char *buffer,
for (int k{0}; k < log10Radix; k += 2) {
Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
dig = 100 * (dig - d * hundredth);
- const char *q = lut + 2 * d;
+ const char *q{lut + 2 * d};
*p++ = q[0];
*p++ = q[1];
}
More information about the flang-commits
mailing list