[llvm-commits] [llvm] r42912 - in /llvm/trunk: include/llvm/ADT/APFloat.h lib/Support/APFloat.cpp
Neil Booth
neil at daikokuya.co.uk
Fri Oct 12 09:02:31 PDT 2007
Author: neil
Date: Fri Oct 12 11:02:31 2007
New Revision: 42912
URL: http://llvm.org/viewvc/llvm-project?rev=42912&view=rev
Log:
Implement correctly-rounded decimal->binary conversion, i.e. conversion
from user input strings.
Such conversions are more intricate and subtle than they may appear;
it is unlikely I have got it completely right first time. I would
appreciate being informed of any bugs and incorrect roundings you
might discover.
Modified:
llvm/trunk/include/llvm/ADT/APFloat.h
llvm/trunk/lib/Support/APFloat.cpp
Modified: llvm/trunk/include/llvm/ADT/APFloat.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/APFloat.h?rev=42912&r1=42911&r2=42912&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/APFloat.h (original)
+++ llvm/trunk/include/llvm/ADT/APFloat.h Fri Oct 12 11:02:31 2007
@@ -60,7 +60,10 @@
if the requested precision is less than the natural precision the
output is correctly rounded for the specified rounding mode.
- Conversion to and from decimal text is not currently implemented.
+ It also reads decimal floating point numbers and correctly rounds
+ according to the specified rounding mode.
+
+ Conversion to decimal text is not currently implemented.
Non-zero finite numbers are represented internally as a sign bit,
a 16-bit signed exponent, and the significand as an array of
@@ -83,13 +86,12 @@
Some features that may or may not be worth adding:
- Conversions to and from decimal strings (hard).
+ Binary to decimal conversion (hard).
Optional ability to detect underflow tininess before rounding.
New formats: x87 in single and double precision mode (IEEE apart
- from extended exponent range) and IBM two-double extended
- precision (hard).
+ from extended exponent range) (hard).
New operations: sqrt, IEEE remainder, C90 fmod, nextafter,
nexttoward.
@@ -186,10 +188,12 @@
opStatus multiply(const APFloat &, roundingMode);
opStatus divide(const APFloat &, roundingMode);
opStatus mod(const APFloat &, roundingMode);
- void copySign(const APFloat &);
opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
- void changeSign(); // neg
- void clearSign(); // abs
+
+ /* Sign operations. */
+ void changeSign();
+ void clearSign();
+ void copySign(const APFloat &);
/* Conversions. */
opStatus convert(const fltSemantics &, roundingMode);
@@ -272,8 +276,12 @@
opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
roundingMode);
opStatus convertFromHexadecimalString(const char *, roundingMode);
+ opStatus convertFromDecimalString (const char *, roundingMode);
char *convertNormalToHexString(char *, unsigned int, bool,
roundingMode) const;
+ opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
+ int, roundingMode);
+
APInt convertFloatAPFloatToAPInt() const;
APInt convertDoubleAPFloatToAPInt() const;
APInt convertF80LongDoubleAPFloatToAPInt() const;
Modified: llvm/trunk/lib/Support/APFloat.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/APFloat.cpp?rev=42912&r1=42911&r2=42912&view=diff
==============================================================================
--- llvm/trunk/lib/Support/APFloat.cpp (original)
+++ llvm/trunk/lib/Support/APFloat.cpp Fri Oct 12 11:02:31 2007
@@ -52,6 +52,23 @@
// onto the usual format above. For now only storage of constants of
// this type is supported, no arithmetic.
const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106 };
+
+ /* A tight upper bound on number of parts required to hold the value
+ pow(5, power) is
+
+ power * 1024 / (441 * integerPartWidth) + 1
+
+ However, whilst the result may require only this many parts,
+ because we are multiplying two values to get it, the
+ multiplication may require an extra part with the excess part
+ being zero (consider the trivial case of 1 * 1, tcFullMultiply
+ requires two parts to hold the single-part result). So we add an
+ extra one to guarantee enough space whilst multiplying. */
+ const unsigned int maxExponent = 16383;
+ const unsigned int maxPrecision = 113;
+ const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+ const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 1024)
+ / (441 * integerPartWidth));
}
/* Put a bunch of private, handy routines in an anonymous namespace. */
@@ -76,7 +93,7 @@
}
unsigned int
- hexDigitValue (unsigned int c)
+ hexDigitValue(unsigned int c)
{
unsigned int r;
@@ -239,6 +256,142 @@
return moreSignificant;
}
+ /* The error from the true value, in half-ulps, on multiplying two
+ floating point numbers, which differ from the value they
+ approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+ than the returned value.
+
+ See "How to Read Floating Point Numbers Accurately" by William D
+ Clinger. */
+ unsigned int
+ HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+ {
+ assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+ if (HUerr1 + HUerr2 == 0)
+ return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
+ else
+ return inexactMultiply + 2 * (HUerr1 + HUerr2);
+ }
+
+ /* The number of ulps from the boundary (zero, or half if ISNEAREST)
+ when the least significant BITS are truncated. BITS cannot be
+ zero. */
+ integerPart
+ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+ {
+ unsigned int count, partBits;
+ integerPart part, boundary;
+
+ assert (bits != 0);
+
+ bits--;
+ count = bits / integerPartWidth;
+ partBits = bits % integerPartWidth + 1;
+
+ part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+ if (isNearest)
+ boundary = (integerPart) 1 << (partBits - 1);
+ else
+ boundary = 0;
+
+ if (count == 0) {
+ if (part - boundary <= boundary - part)
+ return part - boundary;
+ else
+ return boundary - part;
+ }
+
+ if (part == boundary) {
+ while (--count)
+ if (parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return parts[0];
+ } else if (part == boundary - 1) {
+ while (--count)
+ if (~parts[count])
+ return ~(integerPart) 0; /* A lot. */
+
+ return -parts[0];
+ }
+
+ return ~(integerPart) 0; /* A lot. */
+ }
+
+ /* Place pow(5, power) in DST, and return the number of parts used.
+ DST must be at least one part larger than size of the answer. */
+ static unsigned int
+ powerOf5(integerPart *dst, unsigned int power)
+ {
+ /* A tight upper bound on number of parts required to hold the
+ value pow(5, power) is
+
+ power * 65536 / (28224 * integerPartWidth) + 1
+
+ However, whilst the result may require only N parts, because we
+ are multiplying two values to get it, the multiplication may
+ require N + 1 parts with the excess part being zero (consider
+ the trivial case of 1 * 1, the multiplier requires two parts to
+ hold the single-part result). So we add two to guarantee
+ enough space whilst multiplying. */
+ static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+ 15625, 78125 };
+ static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
+ static unsigned int partsCount[16] = { 1 };
+
+ integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+ unsigned int result;
+
+ assert(power <= maxExponent);
+
+ p1 = dst;
+ p2 = scratch;
+
+ *p1 = firstEightPowers[power & 7];
+ power >>= 3;
+
+ result = 1;
+ pow5 = pow5s;
+
+ for (unsigned int n = 0; power; power >>= 1, n++) {
+ unsigned int pc;
+
+ pc = partsCount[n];
+
+ /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
+ if (pc == 0) {
+ pc = partsCount[n - 1];
+ APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+ pc *= 2;
+ if (pow5[pc - 1] == 0)
+ pc--;
+ partsCount[n] = pc;
+ }
+
+ if (power & 1) {
+ integerPart *tmp;
+
+ APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+ result += pc;
+ if (p2[result - 1] == 0)
+ result--;
+
+ /* Now result is in p1 with partsCount parts and p2 is scratch
+ space. */
+ tmp = p1, p1 = p2, p2 = tmp;
+ }
+
+ pow5 += pc;
+ }
+
+ if (p1 != dst)
+ APInt::tcAssign(dst, p1, result);
+
+ return result;
+ }
+
/* Zero at the end to avoid modular arithmetic when adding one; used
when rounding up during hexadecimal output. */
static const char hexDigitsLower[] = "0123456789abcdef0";
@@ -650,6 +803,9 @@
APInt::tcShiftLeft(dividend, partsCount, bit);
}
+ /* Ensure the dividend >= divisor initially for the loop below.
+ Incidentally, this means that the division loop below is
+ guaranteed to set the integer bit to one. */
if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
exponent--;
APInt::tcShiftLeft(dividend, partsCount, 1);
@@ -865,7 +1021,7 @@
/* Keep OMSB up-to-date. */
if(omsb > (unsigned) exponentChange)
- omsb -= (unsigned) exponentChange;
+ omsb -= exponentChange;
else
omsb = 0;
}
@@ -916,7 +1072,6 @@
/* We have a non-zero denormal. */
assert(omsb < semantics->precision);
- assert(exponent == semantics->minExponent);
/* Canonicalize zeroes. */
if(omsb == 0)
@@ -1751,6 +1906,195 @@
}
APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+ unsigned sigPartCount, int exp,
+ roundingMode rounding_mode)
+{
+ unsigned int parts, pow5PartCount;
+ fltSemantics calcSemantics = { 32767, -32767, 0 };
+ integerPart pow5Parts[maxPowerOfFiveParts];
+ bool isNearest;
+
+ isNearest = (rounding_mode == rmNearestTiesToEven
+ || rounding_mode == rmNearestTiesToAway);
+
+ parts = partCountForBits(semantics->precision + 11);
+
+ /* Calculate pow(5, abs(exp)). */
+ pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+ for (;; parts *= 2) {
+ opStatus sigStatus, powStatus;
+ unsigned int excessPrecision, truncatedBits;
+
+ calcSemantics.precision = parts * integerPartWidth - 1;
+ excessPrecision = calcSemantics.precision - semantics->precision;
+ truncatedBits = excessPrecision;
+
+ APFloat decSig(calcSemantics, fcZero, sign);
+ APFloat pow5(calcSemantics, fcZero, false);
+
+ sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+ rmNearestTiesToEven);
+ powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+ rmNearestTiesToEven);
+ /* Add exp, as 10^n = 5^n * 2^n. */
+ decSig.exponent += exp;
+
+ lostFraction calcLostFraction;
+ integerPart HUerr, HUdistance, powHUerr;
+
+ if (exp >= 0) {
+ /* multiplySignificand leaves the precision-th bit set to 1. */
+ calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+ powHUerr = powStatus != opOK;
+ } else {
+ calcLostFraction = decSig.divideSignificand(pow5);
+ /* Denormal numbers have less precision. */
+ if (decSig.exponent < semantics->minExponent) {
+ excessPrecision += (semantics->minExponent - decSig.exponent);
+ truncatedBits = excessPrecision;
+ if (excessPrecision > calcSemantics.precision)
+ excessPrecision = calcSemantics.precision;
+ }
+ /* Extra half-ulp lost in reciprocal of exponent. */
+ powHUerr = 1 + powStatus != opOK;
+ }
+
+ /* Both multiplySignificand and divideSignificand return the
+ result with the integer bit set. */
+ assert (APInt::tcExtractBit
+ (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+ HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+ powHUerr);
+ HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+ excessPrecision, isNearest);
+
+ /* Are we guaranteed to round correctly if we truncate? */
+ if (HUdistance >= HUerr) {
+ APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+ calcSemantics.precision - excessPrecision,
+ excessPrecision);
+ /* Take the exponent of decSig. If we tcExtract-ed less bits
+ above we must adjust our exponent to compensate for the
+ implicit right shift. */
+ exponent = (decSig.exponent + semantics->precision
+ - (calcSemantics.precision - excessPrecision));
+ calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+ decSig.partCount(),
+ truncatedBits);
+ return normalize(rounding_mode, calcLostFraction);
+ }
+ }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
+{
+ const char *dot, *firstSignificantDigit;
+ integerPart val, maxVal, decValue;
+ opStatus fs;
+
+ /* Skip leading zeroes and any decimal point. */
+ p = skipLeadingZeroesAndAnyDot(p, &dot);
+ firstSignificantDigit = p;
+
+ /* The maximum number that can be multiplied by ten with any digit
+ added without overflowing an integerPart. */
+ maxVal = (~ (integerPart) 0 - 9) / 10;
+
+ val = 0;
+ while (val <= maxVal) {
+ if (*p == '.') {
+ assert(dot == 0);
+ dot = p++;
+ }
+
+ decValue = digitValue(*p);
+ if (decValue == -1U)
+ break;
+ p++;
+ val = val * 10 + decValue;
+ }
+
+ integerPart *decSignificand;
+ unsigned int partCount, maxPartCount;
+
+ partCount = 0;
+ maxPartCount = 4;
+ decSignificand = new integerPart[maxPartCount];
+ decSignificand[partCount++] = val;
+
+ /* Now continue to do single-part arithmetic for as long as we can.
+ Then do a part multiplication, and repeat. */
+ while (decValue != -1U) {
+ integerPart multiplier;
+
+ val = 0;
+ multiplier = 1;
+
+ while (multiplier <= maxVal) {
+ if (*p == '.') {
+ assert(dot == 0);
+ dot = p++;
+ }
+
+ decValue = digitValue(*p);
+ if (decValue == -1U)
+ break;
+ p++;
+ multiplier *= 10;
+ val = val * 10 + decValue;
+ }
+
+ if (partCount == maxPartCount) {
+ integerPart *newDecSignificand;
+ newDecSignificand = new integerPart[maxPartCount = partCount * 2];
+ APInt::tcAssign(newDecSignificand, decSignificand, partCount);
+ delete [] decSignificand;
+ decSignificand = newDecSignificand;
+ }
+
+ APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+ partCount, partCount + 1, false);
+
+ /* If we used another part (likely), increase the count. */
+ if (decSignificand[partCount] != 0)
+ partCount++;
+ }
+
+ /* Now decSignificand contains the supplied significand ignoring the
+ decimal point. Figure out our effective exponent, which is the
+ specified exponent adjusted for any decimal point. */
+
+ if (p == firstSignificantDigit) {
+ /* Ignore the exponent if we are zero - we cannot overflow. */
+ category = fcZero;
+ fs = opOK;
+ } else {
+ int decimalExponent;
+
+ if (dot)
+ decimalExponent = dot + 1 - p;
+ else
+ decimalExponent = 0;
+
+ /* Add the given exponent. */
+ if (*p == 'e' || *p == 'E')
+ decimalExponent = totalExponent(p, decimalExponent);
+
+ category = fcNormal;
+ fs = roundSignificandWithExponent(decSignificand, partCount,
+ decimalExponent, rounding_mode);
+ }
+
+ delete [] decSignificand;
+
+ return fs;
+}
+
+APFloat::opStatus
APFloat::convertFromString(const char *p, roundingMode rounding_mode)
{
assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
@@ -1763,9 +2107,8 @@
if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
return convertFromHexadecimalString(p + 2, rounding_mode);
-
- assert(0 && "Decimal to binary conversions not yet implemented");
- abort();
+ else
+ return convertFromDecimalString(p, rounding_mode);
}
/* Write out a hexadecimal representation of the floating point value
More information about the llvm-commits
mailing list