[llvm-commits] CVS: llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt Makefile almabench.c fftbench.cpp huffbench.c lpbench.c

Chris Lattner lattner at cs.uiuc.edu
Sat Mar 4 14:35:30 PST 2006



Changes in directory llvm-test/SingleSource/Benchmarks/CoyoteBench:

LICENSE.txt added (r1.1)
Makefile added (r1.1)
almabench.c added (r1.1)
fftbench.cpp added (r1.1)
huffbench.c added (r1.1)
lpbench.c added (r1.1)
---
Log message:

add an initial version of this benchmark to the testsuite.  This may be updated
in the future if I get a new version from the author


---
Diffs of the changes:  (+1981 -0)

 LICENSE.txt  |    3 
 Makefile     |    5 
 almabench.c  |  378 +++++++++++++++++++++++++++++++
 fftbench.cpp |  699 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 huffbench.c  |  488 +++++++++++++++++++++++++++++++++++++++++
 lpbench.c    |  408 ++++++++++++++++++++++++++++++++++
 6 files changed, 1981 insertions(+)


Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt:1.1
*** /dev/null	Sat Mar  4 16:35:28 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt	Sat Mar  4 16:35:18 2006
***************
*** 0 ****
--- 1,3 ----
+     Written by Scott Robert Ladd.
+     No rights reserved. This is public domain software, for use by anyone.
+ 


Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/Makefile
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/Makefile:1.1
*** /dev/null	Sat Mar  4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/Makefile	Sat Mar  4 16:35:18 2006
***************
*** 0 ****
--- 1,5 ----
+ LEVEL = ../../..
+ LDFLAGS += -lm
+ 
+ include $(LEVEL)/SingleSource/Makefile.singlesrc
+ 


Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/almabench.c
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/almabench.c:1.1
*** /dev/null	Sat Mar  4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/almabench.c	Sat Mar  4 16:35:18 2006
***************
*** 0 ****
--- 1,378 ----
+ /*
+     almabench
+     Standard C version
+     17 April 2003
+ 
+     Written by Scott Robert Ladd.
+     No rights reserved. This is public domain software, for use by anyone.
+ 
+     A number-crunching benchmark that can be used as a fitness test for
+     evolving optimal compiler options via genetic algorithm.
+ 
+     This program calculates the daily ephemeris (at noon) for the years
+     2000-2099 using an algorithm developed by J.L. Simon, P. Bretagnon, J.
+     Chapront, M. Chapront-Touze, G. Francou and J. Laskar of the Bureau des
+     Longitudes, Paris, France), as detailed in Astronomy & Astrophysics
+     282, 663 (1994)
+ 
+     Note that the code herein is design for the purpose of testing 
+     computational performance; error handling and other such "niceties"
+     is virtually non-existent.
+ 
+     Actual benchmark results can be found at:
+             http://www.coyotegulch.com
+ 
+     Please do not use this information or algorithm in any way that might
+     upset the balance of the universe or otherwise cause planets to impact
+     upon one another.
+ */
+ 
+ #include <math.h>
+ #include <time.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <stdbool.h>
+ 
+ #define CALC_PI 3.14159265358979323846
+ 
+ static const double PI        = CALC_PI;
+ static const double J2000     = 2451545.0;
+ static const double JCENTURY  = 36525.0;
+ static const double JMILLENIA = 365250.0;
+ static const double TWOPI     = 2.0 * CALC_PI;
+ static const double A2R       = CALC_PI / 648000.0;
+ static const double R2H       = 12.0 / CALC_PI;
+ static const double R2D       = 180.0 / CALC_PI;
+ static const double GAUSSK    = 0.01720209895;
+ 
+ // number of days to include in test
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int TEST_LOOPS  =     4;
+ #else
+ static const int TEST_LOOPS  =     1;
+ #endif
+ #else
+ static const int TEST_LOOPS  =    20;
+ #endif
+ 
+ static const int TEST_LENGTH = 36525;
+ 
+ // sin and cos of j2000 mean obliquity (iau 1976)
+ static const double sineps = 0.3977771559319137;
+ static const double coseps = 0.9174820620691818;
+ 
+ static const double amas [8] = { 6023600.0, 408523.5, 328900.5, 3098710.0, 1047.355, 3498.5, 22869.0, 19314.0 };
+ 
+ // tables giving the mean keplerian elements, limited to t**2 terms:
+ //        a       semi-major axis (au)
+ //        dlm     mean longitude (degree and arcsecond)
+ //        e       eccentricity
+ //        pi      longitude of the perihelion (degree and arcsecond)
+ //        dinc    inclination (degree and arcsecond)
+ //        omega   longitude of the ascending node (degree and arcsecond)
+ static const double a [8][3] =
+     { {   0.3870983098,             0,        0 },
+       {   0.7233298200,             0,        0 },
+       {   1.0000010178,             0,        0 },
+       {   1.5236793419,         3e-10,        0 },
+       {   5.2026032092,     19132e-10,  -39e-10 },
+       {   9.5549091915, -0.0000213896,  444e-10 },
+       {  19.2184460618,     -3716e-10,  979e-10 },
+       {  30.1103868694,    -16635e-10,  686e-10 } };
+        
+ static const double dlm[8][3] = 
+     { { 252.25090552, 5381016286.88982,  -1.92789 },
+       { 181.97980085, 2106641364.33548,   0.59381 },
+       { 100.46645683, 1295977422.83429,  -2.04411 },
+       { 355.43299958,  689050774.93988,   0.94264 },
+       {  34.35151874,  109256603.77991, -30.60378 },
+       {  50.07744430,   43996098.55732,  75.61614 },
+       { 314.05500511,   15424811.93933,  -1.75083 },
+       { 304.34866548,    7865503.20744,   0.21103 } };
+ 
+ static const double e[8][3] =
+     { {   0.2056317526,  0.0002040653,    -28349e-10 },
+       {   0.0067719164, -0.0004776521,     98127e-10 },
+       {   0.0167086342, -0.0004203654, -0.0000126734 },
+       {   0.0934006477,  0.0009048438,    -80641e-10 },
+       {   0.0484979255,  0.0016322542, -0.0000471366 },
+       {   0.0555481426, -0.0034664062, -0.0000643639 },
+       {   0.0463812221, -0.0002729293,  0.0000078913 },
+       {   0.0094557470,  0.0000603263,            0  } };
+ 
+ static const double pi[8][3] =
+     { {  77.45611904,  5719.11590,   -4.83016 },
+       { 131.56370300,   175.48640, -498.48184 },
+       { 102.93734808, 11612.35290,   53.27577 },
+       { 336.06023395, 15980.45908,  -62.32800 },
+       {  14.33120687,  7758.75163,  259.95938 },
+       {  93.05723748, 20395.49439,  190.25952 },
+       { 173.00529106,  3215.56238,  -34.09288 },
+       {  48.12027554,  1050.71912,   27.39717 } };
+ 
+ static const double dinc[8][3] =
+     { {   7.00498625, -214.25629,   0.28977 },
+       {   3.39466189,  -30.84437, -11.67836 },
+       {            0,  469.97289,  -3.35053 },
+       {   1.84972648, -293.31722,  -8.11830 },
+       {   1.30326698,  -71.55890,  11.95297 },
+       {   2.48887878,   91.85195, -17.66225 },
+       {   0.77319689,  -60.72723,   1.25759 },
+       {   1.76995259,    8.12333,   0.08135 } };
+ 
+ static const double omega[8][3] =
+     { {  48.33089304,  -4515.21727,  -31.79892 },
+       {  76.67992019, -10008.48154,  -51.32614 },
+       { 174.87317577,  -8679.27034,   15.34191 },
+       {  49.55809321, -10620.90088, -230.57416 },
+       { 100.46440702,   6362.03561,  326.52178 },
+       { 113.66550252,  -9240.19942,  -66.23743 },
+       {  74.00595701,   2669.15033,  145.93964 },
+       { 131.78405702,   -221.94322,   -0.78728 } };
+ 
+ // tables for trigonometric terms to be added to the mean elements
+ // of the semi-major axes.
+ static const double kp[8][9] =
+     { { 69613.0,  75645.0, 88306.0, 59899.0, 15746.0, 71087.0, 142173.0,  3086.0,    0.0 },
+       { 21863.0,  32794.0, 26934.0, 10931.0, 26250.0, 43725.0,  53867.0, 28939.0,    0.0 },
+       { 16002.0,  21863.0, 32004.0, 10931.0, 14529.0, 16368.0,  15318.0, 32794.0,    0.0 },
+       {  6345.0,   7818.0, 15636.0,  7077.0,  8184.0, 14163.0,   1107.0,  4872.0,    0.0 },
+       {  1760.0,   1454.0,  1167.0,   880.0,   287.0,  2640.0,     19.0,  2047.0, 1454.0 },
+       {   574.0,      0.0,   880.0,   287.0,    19.0,  1760.0,   1167.0,   306.0,  574.0 },
+       {   204.0,      0.0,   177.0,  1265.0,     4.0,   385.0,    200.0,   208.0,  204.0 },
+       {     0.0,    102.0,   106.0,     4.0,    98.0,  1367.0,    487.0,   204.0,    0.0 } };
+ 
+ static const double ca[8][9] =
+     { {       4.0,    -13.0,    11.0,    -9.0,    -9.0,    -3.0,    -1.0,     4.0,    0.0 },
+       {    -156.0,     59.0,   -42.0,     6.0,    19.0,   -20.0,   -10.0,   -12.0,    0.0 },
+       {      64.0,   -152.0,    62.0,    -8.0,    32.0,   -41.0,    19.0,   -11.0,    0.0 },
+       {     124.0,    621.0,  -145.0,   208.0,    54.0,   -57.0,    30.0,    15.0,    0.0 },
+       {  -23437.0,  -2634.0,  6601.0,  6259.0, -1507.0, -1821.0,  2620.0, -2115.0,-1489.0 },
+       {   62911.0,-119919.0, 79336.0, 17814.0,-24241.0, 12068.0,  8306.0, -4893.0, 8902.0 },
+       {  389061.0,-262125.0,-44088.0,  8387.0,-22976.0, -2093.0,  -615.0, -9720.0, 6633.0 },
+       { -412235.0,-157046.0,-31430.0, 37817.0, -9740.0,   -13.0, -7449.0,  9644.0,    0.0 } };
+ 
+ static const double sa[8][9] =
+     { {     -29.0,    -1.0,     9.0,     6.0,    -6.0,     5.0,     4.0,     0.0,    0.0 },
+       {     -48.0,  -125.0,   -26.0,   -37.0,    18.0,   -13.0,   -20.0,    -2.0,    0.0 },
+       {    -150.0,   -46.0,    68.0,    54.0,    14.0,    24.0,   -28.0,    22.0,    0.0 },
+       {    -621.0,   532.0,  -694.0,   -20.0,   192.0,   -94.0,    71.0,   -73.0,    0.0 },
+       {  -14614.0,-19828.0, -5869.0,  1881.0, -4372.0, -2255.0,   782.0,   930.0,  913.0 },
+       {  139737.0,     0.0, 24667.0, 51123.0, -5102.0,  7429.0, -4095.0, -1976.0,-9566.0 },
+       { -138081.0,     0.0, 37205.0,-49039.0,-41901.0,-33872.0,-27037.0,-12474.0,18797.0 },
+       {       0.0, 28492.0,133236.0, 69654.0, 52322.0,-49577.0,-26430.0, -3593.0,    0.0 } };
+ 
+ // tables giving the trigonometric terms to be added to the mean
+ // elements of the mean longitudes.
+ static const double kq[8][10] =
+     { {  3086.0, 15746.0, 69613.0, 59899.0, 75645.0, 88306.0, 12661.0, 2658.0,  0.0,   0.0 },
+       { 21863.0, 32794.0, 10931.0,    73.0,  4387.0, 26934.0,  1473.0, 2157.0,  0.0,   0.0 },
+       {    10.0, 16002.0, 21863.0, 10931.0,  1473.0, 32004.0,  4387.0,   73.0,  0.0,   0.0 },
+       {    10.0,  6345.0,  7818.0,  1107.0, 15636.0,  7077.0,  8184.0,  532.0, 10.0,   0.0 },
+       {    19.0,  1760.0,  1454.0,   287.0,  1167.0,   880.0,   574.0, 2640.0, 19.0,1454.0 },
+       {    19.0,   574.0,   287.0,   306.0,  1760.0,    12.0,    31.0,   38.0, 19.0, 574.0 },
+       {     4.0,   204.0,   177.0,     8.0,    31.0,   200.0,  1265.0,  102.0,  4.0, 204.0 },
+       {     4.0,   102.0,   106.0,     8.0,    98.0,  1367.0,   487.0,  204.0,  4.0, 102.0 } };
+ 
+ static const double cl[8][10] =
+     { {      21.0,   -95.0, -157.0,   41.0,   -5.0,   42.0,   23.0,   30.0,     0.0,    0.0 },
+       {    -160.0,  -313.0, -235.0,   60.0,  -74.0,  -76.0,  -27.0,   34.0,     0.0,    0.0 },
+       {    -325.0,  -322.0,  -79.0,  232.0,  -52.0,   97.0,   55.0,  -41.0,     0.0,    0.0 },
+       {    2268.0,  -979.0,  802.0,  602.0, -668.0,  -33.0,  345.0,  201.0,   -55.0,    0.0 },
+       {    7610.0, -4997.0,-7689.0,-5841.0,-2617.0, 1115.0, -748.0, -607.0,  6074.0,  354.0 },
+       {  -18549.0, 30125.0,20012.0, -730.0,  824.0,   23.0, 1289.0, -352.0,-14767.0,-2062.0 },
+       { -135245.0,-14594.0, 4197.0,-4030.0,-5630.0,-2898.0, 2540.0, -306.0,  2939.0, 1986.0 },
+       {   89948.0,  2103.0, 8963.0, 2695.0, 3682.0, 1648.0,  866.0, -154.0, -1963.0, -283.0 } };
+ 
+ static const double sl[8][10] =
+     { {   -342.0,   136.0,  -23.0,   62.0,   66.0,  -52.0,  -33.0,   17.0,     0.0,    0.0 },
+       {    524.0,  -149.0,  -35.0,  117.0,  151.0,  122.0,  -71.0,  -62.0,     0.0,    0.0 },
+       {   -105.0,  -137.0,  258.0,   35.0, -116.0,  -88.0, -112.0,  -80.0,     0.0,    0.0 },
+       {    854.0,  -205.0, -936.0, -240.0,  140.0, -341.0,  -97.0, -232.0,   536.0,    0.0 },
+       { -56980.0,  8016.0, 1012.0, 1448.0,-3024.0,-3710.0,  318.0,  503.0,  3767.0,  577.0 },
+       { 138606.0,-13478.0,-4964.0, 1441.0,-1319.0,-1482.0,  427.0, 1236.0, -9167.0,-1918.0 },
+       {  71234.0,-41116.0, 5334.0,-4935.0,-1848.0,   66.0,  434.0,-1748.0,  3780.0, -701.0 },
+       { -47645.0, 11647.0, 2166.0, 3194.0,  679.0,    0.0, -244.0, -419.0, -2531.0,   48.0 } };
+ 
+ //---------------------------------------------------------------------------
+ // Normalize angle into the range -pi <= A < +pi.
+ double anpm (double a)
+ {
+     double w = fmod(a,TWOPI);
+     
+     if (fabs(w) >= PI) 
+         w = w - ((a < 0) ? -TWOPI : TWOPI);
+         
+     return w;
+ }
+ 
+ //---------------------------------------------------------------------------    
+ // The reference frame is equatorial and is with respect to the
+ //    mean equator and equinox of epoch j2000.
+ void planetpv (double epoch[2], int np, double pv[2][3])
+ {
+     // working storage
+     int i, j, k;
+     double t, da, dl, de, dp, di, doh, dmu, arga, argl, am;
+     double ae, dae, ae2, at, r, v, si2, xq, xp, tl, xsw;
+     double xcw, xm2, xf, ci2, xms, xmc, xpxq2, x, y, z;
+ 
+     // time: julian millennia since j2000.
+     t = ((epoch[0] - J2000) + epoch[1]) / JMILLENIA;
+ 
+     // compute the mean elements.
+     da  = a[np][0] + (a[np][1] + a[np][2] * t ) * t;
+     dl  = (3600.0 * dlm[np][0] + (dlm[np][1] + dlm[np][2] * t ) * t ) * A2R;
+     de  = e[np][0] + (e[np][1] + e[np][2] * t ) * t;
+     dp  = anpm((3600.0 * pi[np][0] + (pi[np][1] + pi[np][2] * t ) * t ) * A2R );
+     di  = (3600.0 * dinc[np][0] + (dinc[np][1] + dinc[np][2] * t ) * t ) * A2R;
+     doh = anpm((3600.0 * omega[np][0] + (omega[np][1] + omega[np][2] * t ) * t ) * A2R );
+     
+     // apply the trigonometric terms.
+     dmu = 0.35953620 * t;
+     
+     for (k = 0; k < 8; ++k)
+     {
+         arga = kp[np][k] * dmu;
+         argl = kq[np][k] * dmu;
+         da   = da + (ca[np][k] * cos(arga) + sa[np][k] * sin(arga)) * 0.0000001;
+         dl   = dl + (cl[np][k] * cos(argl) + sl[np][k] * sin(argl)) * 0.0000001;
+     }
+ 
+     arga = kp[np][8] * dmu;
+     da   = da + t * (ca[np][8] * cos(arga) + sa[np][8] * sin(arga)) * 0.0000001;
+ 
+     for (k = 8; k <= 9; ++k)
+     {
+         argl = kq[np][k] * dmu;
+         dl   = dl + t * ( cl[np][k] * cos(argl) + sl[np][k] * sin(argl) ) * 0.0000001;
+     }
+ 
+     dl = fmod(dl,TWOPI);
+ 
+     // iterative solution of kepler's equation to get eccentric anomaly.
+     am = dl - dp;
+     ae = am + de * sin(am);
+     k  = 0;
+ 
+     while (1)
+     {
+         dae = (am - ae + de * sin(ae)) / (1.0 - de * cos(ae));
+         ae  = ae + dae;
+         k   = k + 1;
+     
+         if ((k >= 10) || (fabs(dae) < 1e-12))
+             break;
+     }
+ 
+     // true anomaly.
+     ae2 = ae / 2.0;
+     at  = 2.0 * atan2(sqrt((1.0 + de)/(1.0 - de)) * sin(ae2), cos(ae2));
+ 
+     // distance (au) and speed (radians per day).
+     r = da * (1.0 - de * cos(ae));
+     v = GAUSSK * sqrt((1.0 + 1.0 / amas[np] ) / (da * da * da));
+ 
+     si2   = sin(di / 2.0);
+     xq    = si2 * cos(doh);
+     xp    = si2 * sin(doh);
+     tl    = at + dp;
+     xsw   = sin(tl);
+     xcw   = cos(tl);
+     xm2   = 2.0 * (xp * xcw - xq * xsw );
+     xf    = da / sqrt(1.0 - de*de);
+     ci2   = cos(di / 2.0);
+     xms   = (de * sin(dp) + xsw) * xf;
+     xmc   = (de * cos(dp) + xcw) * xf;
+     xpxq2 = 2.0 * xp * xq;
+ 
+     // position (j2000 ecliptic x,y,z in au).
+     x = r * (xcw - xm2 * xp);
+     y = r * (xsw + xm2 * xq);
+     z = r * (-xm2 * ci2);
+ 
+     // rotate to equatorial.
+     pv[0][0] = x;
+     pv[0][1] = y * coseps - z * sineps;
+     pv[0][2] = y * sineps + z * coseps;
+ 
+     // velocity (j2000 ecliptic xdot,ydot,zdot in au/d).
+     x = v * ((-1.0 + 2.0 * xp * xp) * xms + xpxq2 * xmc);
+     y = v * (( 1.0 - 2.0 * xq * xq ) * xmc - xpxq2 * xms);
+     z = v * (2.0 * ci2 * (xp * xms + xq * xmc));
+ 
+     // rotate to equatorial.
+     pv[1][0] = x;
+     pv[1][1] = y * coseps - z * sineps;
+     pv[1][2] = y * sineps + z * coseps;
+ }
+ 
+ //---------------------------------------------------------------------------
+ // Computes RA, Declination, and distance from a state vector returned by
+ // planetpv.
+ void radecdist(double state[2][3], double rdd[3])
+ {
+     // distance
+     rdd[2] = sqrt(state[0][0] * state[0][0] + state[0][1] * state[0][1] + state[0][2] * state[0][2]);
+ 
+     // RA
+     rdd[0] = atan2(state[0][1], state[0][0]) * R2H;
+     if (rdd[0] < 0.0) rdd[0] += 24.0;
+ 
+     // Declination
+     rdd[1] = asin(state[0][2] / rdd[2]) * R2D;
+ }
+ 
+ //---------------------------------------------------------------------------
+ // Entry point
+ // Calculate RA and Dec for noon on every day in 1900-2100
+ int main(int argc, char ** argv)
+ {
+     int i, n, p;
+     double jd[2];
+     double pv[2][3];
+     double position[3];
+     bool   ga_testing = false;
+     
+     // do we have verbose output?
+     if (argc > 1)
+     {
+         for (i = 1; i < argc; ++i)
+         {
+             if (!strcmp(argv[1],"-ga"))
+             {
+                 ga_testing = true;
+                 break;
+             }
+         }
+     }
+     
+     // get starting time    
+     
+     // main loop
+     for (i = 0; i < TEST_LOOPS; ++i)
+     {
+         jd[0] = J2000;
+         jd[1] = 0.0;
+ 
+         for (n = 0; n < TEST_LENGTH; ++n)
+         {
+             jd[0] += 1.0;
+             
+             for (p = 0; p < 8; ++p)
+             {
+                 planetpv(jd,p,pv);
+                 radecdist(pv,position);
+             }
+         }
+     }
+ 
+     // get final time
+ 
+     // report runtime
+     
+     fflush(stdout);
+     
+     return 0;
+ }


Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/fftbench.cpp
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/fftbench.cpp:1.1
*** /dev/null	Sat Mar  4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/fftbench.cpp	Sat Mar  4 16:35:18 2006
***************
*** 0 ****
--- 1,699 ----
+ /*
+     fftbench
+     Standard C++ version
+     10 May 2003
+ 
+     Written by Scott Robert Ladd.
+     No rights reserved. This is public domain software, for use by anyone.
+ 
+     A number-crunching benchmark that can be used as a fitness test for
+     evolving optimal compiler options via genetic algorithm.
+ 
+     A simplified version of the FFT component from my standard libcoyote
+     library, this benchmark test FFT performance on complex<double> values.
+ 
+     Note that the code herein is design for the purpose of testing 
+     computational performance; error handling and other such "niceties"
+     is virtually non-existent.
+ 
+     Actual benchmark results can be found at:
+             http://www.coyotegulch.com
+ 
+     Please do not use this information or algorithm in any way that might
+     upset the balance of the universe or to produce imaginary friends for
+     imaginary numbers.
+ */
+ 
+ #include <cmath>
+ #include <complex>
+ #include <ctime>
+ #include <cstring>
+ #include <iostream>
+ #include <iomanip>
+ #include <stdexcept>
+ 
+ using namespace std;
+ 
+ //---------------------------------------------------------------------------
+ // number of loops to perform
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int TEST_SIZE   =   262144;
+ #else
+ static const int TEST_SIZE   =   131072;
+ #endif
+ #else
+ static const int TEST_SIZE   =  2097152;
+ #endif
+ 
+ //---------------------------------------------------------------------------
+ // embedded random number generator; ala Park and Miller
+ static double random_double()
+ {
+     static       long seed = 1325;
+     static const long IA   = 16807;
+     static const long IM   = 2147483647;
+     static const double AM = 4.65661287525E-9;
+     static const long IQ   = 127773;
+     static const long IR   = 2836;
+     static const long MASK = 123459876;
+ 
+     long k;
+     double result;
+     
+     seed ^= MASK;
+     k = seed / IQ;
+     seed = IA * (seed - k * IQ) - IR * k;
+     
+     if (seed < 0)
+         seed += IM;
+     
+     result = AM * seed;
+     seed ^= MASK;
+     
+     return result;
+ }
+ 
+ //---------------------------------------------------------------------------
+ template <class T>
+ class polynomial
+ {
+     public:
+         // creation constructor, uninitialized (coefficients will be unknown values)
+         polynomial(size_t degree);
+     
+         // creation constructor, with initialization from array
+         polynomial(size_t degree, const T * coefficients);
+     
+         // creation constructor, with single-value initialization constructor
+         polynomial(size_t degree, const T & value);
+     
+         // copy constructor
+         polynomial(const polynomial<T> & source);
+         
+         // destructor
+         virtual ~polynomial();
+         
+         // assignment
+         polynomial<T> & operator = (const polynomial<T> & source);
+         
+         // initialize to specific value
+         void initialize(const T & value = T(0));
+     
+         // increase polynomial length
+         polynomial<T> & stretch(size_t degrees);
+         
+         // interrogate for degree
+         size_t degree() const
+         {
+             return m_degree;
+         }
+         
+         // get specific coefficient
+         T get(size_t term) const;
+         
+         T & operator [] (size_t term);
+         
+         // evaluate for a specific value
+         T operator() (const T & x) const;
+         
+         // unitary operators
+         polynomial<T> operator - () const;
+         polynomial<T> operator + () const;
+         
+         // binary mathematical operators
+         polynomial<T> operator + (const polynomial<T> & poly) const;
+         polynomial<T> operator - (const polynomial<T> & poly) const;
+         
+         // constant required by FFT routines
+         static const complex<T> PI2I;
+ 
+         // returns largest power of two that holds n
+         static size_t log2(size_t n);
+ 
+         // reverses a sequence of bits
+         static size_t flip_bits(size_t k, size_t bits);
+ 
+         // stretches the length of a polynomial to a power of two
+         size_t stretch_fft();
+ 
+         // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
+         static polynomial< complex<T> > bit_reverse(const polynomial<T> & poly);
+         static polynomial< complex<T> > bit_reverse(const polynomial< complex<T> > & poly);
+ 
+         // Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
+         static polynomial< complex<T> > fft(const polynomial<T> & poly);
+ 
+         // inverse FFT of polynomial< complex<T> > to polynomial<T>
+         static polynomial< complex<T> > inverse_fft(const polynomial< complex<T> > & poly);
+ 
+         // multiplication via FFT
+         polynomial<T> operator * (const polynomial<T> & poly) const;
+         
+         // shorthand mathematical operators
+         polynomial<T> & operator += (const polynomial<T> & poly);
+         polynomial<T> & operator -= (const polynomial<T> & poly);
+         polynomial<T> & operator *= (const polynomial<T> & poly);
+         
+     protected:
+         // coefficients
+         T * m_coeff;
+     
+         // number of terms
+         size_t m_degree;
+         
+         // acquire resources
+         void acquire();
+         
+         // release resources
+         void release();
+         
+         // deep copy
+         void deep_copy(const T * source);
+ };
+ 
+ // acquire resources
+ template <typename T>
+ inline void polynomial<T>::acquire()
+ {
+     m_coeff = new T [m_degree];
+ }
+ 
+ // release resources
+ template <typename T>
+ inline void polynomial<T>::release()
+ {
+     delete [] m_coeff;
+ }
+ 
+ // deep copy
+ template <typename T>
+ inline void polynomial<T>::deep_copy(const T * source)
+ {
+     for (size_t n = 0; n < m_degree; ++n)
+         m_coeff[n] = source[n];
+ }
+ 
+ // initialize to specific value
+ template <typename T>
+ inline void polynomial<T>::initialize(const T & value)
+ {
+     for (size_t n = 0; n < m_degree; ++n)
+         m_coeff[n] = value;
+ }
+     
+ // creation constructor, uninitialized (coefficients will be unknown values)
+ template <typename T>
+ polynomial<T>::polynomial(size_t degree)
+   : m_coeff(NULL),
+     m_degree(degree)
+ {
+     acquire();
+ }
+ 
+ // creation constructor, with possible initialization from array
+ template <typename T>
+ polynomial<T>::polynomial(size_t degree, const T * coefficients)
+   : m_coeff(NULL),
+     m_degree(degree)
+ {
+     acquire();
+     
+     if (coefficients != NULL)
+         deep_copy(coefficients);
+ }
+ 
+ // creation constructor, with single-value initialization constructor
+ template <typename T>
+ polynomial<T>::polynomial(size_t degree, const T & value)
+   : m_coeff(NULL),
+     m_degree(degree)
+ {
+     acquire();
+     initialize(value);
+ }
+     
+ // copy constructor
+ template <typename T>
+ polynomial<T>::polynomial(const polynomial<T> & source)
+   : m_coeff(NULL),
+     m_degree(source.m_degree)
+ {
+     acquire();
+     deep_copy(source.m_coeff);
+ }
+ 
+ // destructor
+ template <typename T>
+ polynomial<T>::~polynomial()
+ {
+     release();
+ }
+ 
+ // assignment
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator = (const polynomial<T> & source)
+ {
+     if (m_degree != source.m_degree)
+     {
+         release();
+     
+         m_degree = source.m_degree;
+         acquire();
+     }
+     
+     deep_copy(source.m_coeff);
+     
+     return *this;
+ }
+ 
+ // increase polynomial length
+ template <typename T>
+ polynomial<T> & polynomial<T>::stretch(size_t degrees)
+ {
+     if (degrees != 0)
+     {
+         T * old_coeff = m_coeff;
+         size_t old_degree = m_degree;
+     
+         m_degree += degrees;
+         acquire();
+         
+         size_t n = 0;
+         
+         for (; n < old_degree; ++n)
+             m_coeff[n] = old_coeff[n];
+         
+         for (; n < m_degree; ++n)
+             m_coeff[n] = T(0);
+     }
+     
+     return *this;
+ }
+ 
+ // get specific coefficient
+ template <typename T>
+ T polynomial<T>::get(size_t term) const
+ {
+     return m_coeff[term];
+ }
+ 
+ template <typename T>
+ T & polynomial<T>::operator [] (size_t term)
+ {
+     return m_coeff[term];
+ }
+ 
+ // evaluate for a specific value
+ template <typename T>
+ T polynomial<T>::operator() (const T & x) const
+ {
+     // using Horner's Rule
+     T y = m_coeff[m_degree - 1];
+     
+     size_t i = m_degree - 2;
+     
+     while (true)
+     {
+         y = x * y + m_coeff[i];
+         
+         if (i > 0)
+             --i;
+         else
+             break;
+     }
+     
+     return y;
+ }
+ 
+ // unitary operators
+ template <typename T>
+ polynomial<T> polynomial<T>::operator - () const
+ {
+     polynomial<T> result(m_degree); // uninitialized
+     
+     for (size_t n = 0; n < m_degree; ++n)
+         result.m_coeff[n] = -m_coeff[n];
+     
+     return result;
+ }
+ 
+ template <typename T>
+ polynomial<T> polynomial<T>::operator + () const
+ {
+     return polynomial<T>(*this); 
+ }
+ 
+ // binary mathematical operators
+ template <typename T>
+ polynomial<T> polynomial<T>::operator + (const polynomial<T> & poly) const
+ {
+     if (m_degree >= poly.m_degree)
+     {
+         polynomial<T> result(*this); 
+     
+         for (size_t n = 0; n < m_degree; ++n)
+             result.m_coeff[n] += poly.m_coeff[n];
+     
+         return result;
+     }
+     else
+     {
+         polynomial<T> result(poly); 
+     
+         for (size_t n = 0; n < m_degree; ++n)
+             result.m_coeff[n] += m_coeff[n];
+     
+         return result;
+     }
+ }
+ 
+ template <typename T>
+ polynomial<T> polynomial<T>::operator - (const polynomial<T> & poly) const
+ {
+     if (m_degree >= poly.m_degree)
+     {
+         polynomial<T> result(*this); 
+     
+         for (size_t n = 0; n < m_degree; ++n)
+             result.m_coeff[n] -= poly.m_coeff[n];
+     
+         return result;
+     }
+     else
+     {
+         polynomial<T> result(poly); 
+     
+         for (size_t n = 0; n < m_degree; ++n)
+             result.m_coeff[n] -= m_coeff[n];
+     
+         return result;
+     }
+ }
+ 
+ // constant required by FFT routines
+ template <typename T>
+ const complex<T> polynomial<T>::PI2I(0.0,6.283185307179586);
+ 
+ // returns largest power of two that holds n
+ template <typename T>
+ size_t polynomial<T>::log2(size_t n)
+ {
+     // returns 1 if n == 0!
+     size_t x = 1, c = 0;
+ 
+     while (x < n)
+     {
+         ++c;
+         x <<= 1;
+ 
+         if (x == 0)
+             break;
+     }
+ 
+     return c;
+ }
+ 
+ // reverses a sequence of bits
+ template <typename T>
+ size_t polynomial<T>::flip_bits(size_t k, size_t bits)
+ {
+     size_t lm = 1 << (bits - 1);
+     size_t rm = 1;
+     size_t  r = 0;
+ 
+     while (lm)
+     {
+         if (k & rm)
+             r |= (lm);
+ 
+         lm >>= 1;
+         rm <<= 1;
+     }
+ 
+     return r;
+ }
+ 
+ // stretches the length of a polynomial to a power of two
+ template <class T>
+ size_t polynomial<T>::stretch_fft()
+ {
+     size_t n = 1;
+ 
+     while (true)
+     {
+         if (m_degree <= n)
+             break;
+ 
+         n <<= 1;
+ 
+         if (n == 0)
+             throw overflow_error("overflow in fft polynomial stretch");
+     }
+ 
+     n <<= 1;
+     n -= m_degree;
+ 
+     if (n > 0)
+         stretch(n);
+ 
+     return n;
+ }
+ 
+ // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial<T> & poly)
+ {
+     size_t b = log2(poly.degree());
+ 
+     polynomial< complex<T> > result(poly.degree());
+ 
+     for (size_t n = 0; n < poly.degree(); ++n)
+         result[flip_bits(n,b)] = poly.get(n);
+ 
+     return result;
+ }
+ 
+ // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial< complex<T> > & poly)
+ {
+     size_t b = log2(poly.degree());
+     
+     polynomial< complex<T> > result(poly.degree());
+ 
+     for (size_t n = 0; n < poly.degree(); ++n)
+         result[flip_bits(n,b)] = poly.get(n);
+ 
+     return result;
+ }
+ 
+ // Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::fft(const polynomial<T> & poly)
+ {
+     size_t nl = log2(poly.degree());
+     size_t j, k, m, m2, s;
+     complex<T> wm, w, t, u;
+     
+     polynomial< complex<T> > result = bit_reverse(poly);
+ 
+     m  = 2;
+     m2 = 1;
+ 
+     for (s = 0; s < nl; ++s)
+     {
+         wm = exp(PI2I / complex<T>(m));
+         w  = 1.0;
+ 
+         for (j = 0; j <= (m2 - 1); ++j)
+         {
+             for (k = j; k <= poly.degree() - 1; k += m)
+             {
+                 t = w * result[k + m2];
+                 u = result[k];
+                 result[k] = u + t;
+                 result[k + m2] = u - t;
+             }
+ 
+             w *= wm;
+         }
+ 
+         m  <<= 1;
+         m2 <<= 1;
+     }
+ 
+     return result;
+ }
+ 
+ // inverse FFT of polynomial< complex<T> > to polynomial<T>
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::inverse_fft(const polynomial< complex<T> > & poly)
+ {
+     size_t nl = log2(poly.degree());
+     size_t j, k, m, m2, s;
+     complex<T> wm, w, t, u;
+     
+     polynomial< complex<T> > result = bit_reverse(poly);
+ 
+     m  = 2;
+     m2 = 1;
+ 
+     for (s = 0; s < nl; ++s)
+     {
+         wm = exp(-PI2I / complex<T>(m));
+         w  = 1.0;
+ 
+         for (j = 0; j <= (m2 - 1); ++j)
+         {
+             for (k = j; k <= poly.degree() - 1; k += m)
+             {
+                 t = w * result[k + m2];
+                 u = result[k];
+                 result[k] = u + t;
+                 result[k + m2] = u - t;
+             }
+ 
+             w *= wm;
+         }
+ 
+         m  <<= 1;
+         m2 <<= 1;
+     }
+ 
+     for (j = 0; j < poly.degree(); ++j)
+         result[j] /= double(poly.degree());
+ 
+     return result;
+ }
+ 
+ // multiplication by FFT
+ template <typename T>
+ polynomial<T> polynomial<T>::operator * (const polynomial<T> & poly) const
+ {
+ #if defined(BRUTE_FORCE)
+     // brute force algorithm
+     if (m_degree != poly.m_degree)
+         throw domain_error("can not multiply two polynomials with different lengths");
+     
+     polynomial<T> result(2 * m_degree - 1, T(0));
+     
+     for (size_t n1 = 0; n1 < m_degree; ++n1)
+         for (size_t n2 = 0; n2 < m_degree; ++n2)
+             result.m_coeff[n1 + n2] += m_coeff[n1] * poly.m_coeff[n2];
+ #else
+     // duplicate p1 and p2 to preserve original objects
+     polynomial<T> a1(*this);
+     polynomial<T> a2(poly);
+     
+     // expand polynomials to next-largest power of two
+     if (a1.degree() > a2.degree())
+         a2.stretch(a1.stretch_fft());
+     else
+         a1.stretch(a2.stretch_fft());
+     
+     // FFT polynomials
+     polynomial< complex<T> > dft1 = fft(a1);
+     polynomial< complex<T> > dft2 = fft(a2);
+ 
+     // multiply coefficients
+     size_t n2 = a1.degree();
+     
+     for (size_t k = 0; k < n2; ++k)
+         dft1[k] *= dft2[k];
+ 
+     // inverse DFT to obtain result
+     dft2 = inverse_fft(dft1);
+ 
+     // convert back to complex<T>
+     --n2;
+     polynomial<T> result(n2);
+ 
+     for (size_t k = 0; k < n2; ++k)
+         result[k] = dft2[k].real();
+ #endif
+     
+     return result;
+ }
+ 
+ // shorthand mathematical operators
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator += (const polynomial<T> & poly)
+ {
+     size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;
+     
+     for (size_t n = 0; n < length; ++n)
+         m_coeff[n] += poly.m_coeff[n];
+     
+     return *this;
+ }
+ 
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator -= (const polynomial<T> & poly)
+ {
+     size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;
+     
+     for (size_t n = 0; n < length; ++n)
+         m_coeff[n] -= poly.m_coeff[n];
+     
+     return *this;
+ }
+ 
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator *= (const polynomial<T> & poly)
+ {
+     *this = (*this) * poly;
+ }
+ 
+ //---------------------------------------------------------------------------
+ // Entry point
+ int main(int argc, char ** argv)
+ {
+     // are we testing using a genetic algorithm?
+     bool ga_testing = false;
+     
+     if (argc > 1)
+     {
+         for (int i = 1; i < argc; ++i)
+         {
+             if (!strcmp(argv[1],"-ga"))
+             {
+                 ga_testing = true;
+                 break;
+             }
+         }
+     }
+     
+     // generate random polynomials
+     polynomial<double> poly1(TEST_SIZE);
+     polynomial<double> poly2(TEST_SIZE);
+     polynomial<double> poly3(TEST_SIZE * 2 - 1);
+     
+     for (int n = 0; n < TEST_SIZE; ++n)
+     {
+         poly1[n] = random_double();
+         poly2[n] = random_double();
+     }
+     
+     // get starting time    
+     //struct timespec start, stop;
+     //clock_gettime(CLOCK_REALTIME,&start);
+     
+     // what we're timing
+     poly3 = poly1 * poly2;
+     
+     // get final time
+     //clock_gettime(CLOCK_REALTIME,&stop);        
+     double run_time = 0;
+     //(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
+ 
+     // report runtime
+     if (ga_testing)
+         cout << run_time;
+     else        
+         cout << "\nfftbench (Std. C++) run time: " << run_time << "\n\n";
+     
+     cout.flush();
+     
+     return 0;
+ }


Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/huffbench.c
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/huffbench.c:1.1
*** /dev/null	Sat Mar  4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/huffbench.c	Sat Mar  4 16:35:18 2006
***************
*** 0 ****
--- 1,488 ----
+ /*
+     huffbench
+     Standard C version
+     17 April 2003
+ 
+     Written by Scott Robert Ladd (scott at coyotegulch.com)
+     No rights reserved. This is public domain software, for use by anyone.
+ 
+     A data compression benchmark that can also be used as a fitness test 
+     for evolving optimal compiler options via genetic algorithm.
+ 
+     This program implements the Huffman compression algorithm. The code
+     is not the tightest or fastest possible C code; rather, it is a 
+     relatively straight-forward implementation of the algorithm,
+     providing opportunities for an optimizer to "do its thing."
+     
+     Note that the code herein is design for the purpose of testing 
+     computational performance; error handling and other such "niceties"
+     is virtually non-existent.
+ 
+     Actual benchmark results can be found at:
+             http://www.coyotegulch.com
+ 
+     Please do not use this information or algorithm in any way that might
+     upset the balance of the universe or otherwise cause the creation of
+     singularities.
+ */
+ 
+ #include <time.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <stddef.h>
+ #include <stdbool.h>
+ #include <memory.h>
+ #include <math.h>
+ 
+ // embedded random number generator; ala Park and Miller
+ static       long seed = 1325;
+ static const long IA   = 16807;
+ static const long IM   = 2147483647;
+ static const long IQ   = 127773;
+ static const long IR   = 2836;
+ static const long MASK = 123459876;
+ 
+ // return index between 0 and 3
+ static size_t random4()
+ {
+     long k;
+     size_t result;
+     
+     seed ^= MASK;
+     k = seed / IQ;
+     seed = IA * (seed - k * IQ) - IR * k;
+     
+     if (seed < 0L)
+         seed += IM;
+     
+     result = (size_t)(seed % 32L);
+     seed ^= MASK;
+     
+     return result;
+ }
+ 
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int NUM_LOOPS =        5;
+ static const int TEST_SIZE = 10000000;
+ #else
+ static const int NUM_LOOPS =        2;
+ static const int TEST_SIZE =  5000000;
+ #endif
+ #else
+ static const int NUM_LOOPS =       30;
+ static const int TEST_SIZE = 10000000;
+ #endif
+ 
+ typedef unsigned long bits32;
+ typedef unsigned char byte;
+ 
+ // compressed (encoded) data
+ 
+ byte * generate_test_data(size_t n)
+ {
+     char * codes = "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345";
+     
+     byte * result = (byte *)malloc(n);
+     byte * ptr = result;
+     
+     int i;
+     for (i = 0; i < n; ++i)
+     {
+         *ptr = (byte)codes[random4()];
+         ++ptr;
+     }
+     
+     return result;
+ }
+ 
+ // utility function for processing compression trie
+ static void heap_adjust(size_t * freq, size_t * heap, int n, int k)
+ {
+     // this function compares the values in the array
+     // 'freq' to order the elements of 'heap' according
+     // in an inverse heap. See the chapter on priority
+     // queues and heaps for more explanation.
+     int j;
+ 
+     --heap;
+ 
+     int v = heap[k];
+ 
+     while (k <= (n / 2))
+     {
+         j = k + k;
+     
+         if ((j < n) && (freq[heap[j]] > freq[heap[j+1]]))
+             ++j;
+     
+         if (freq[v] < freq[heap[j]])
+             break;
+     
+         heap[k] = heap[j];
+         k = j;
+     }
+ 
+     heap[k] = v;
+ }
+ 
+ // Huffman compression/decompression function
+ void compdecomp(byte * data, size_t data_len)
+ {
+     size_t i, j, n, mask;
+     bits32 k, t;
+     byte   c;
+     byte * cptr;
+     byte * dptr = data;
+     
+     /*
+         COMPRESSION
+     */
+ 
+     // allocate data space
+     byte * comp = (byte *)malloc(data_len + 1);
+     
+     size_t freq[512];   // allocate frequency table
+     size_t heap[256];   // allocate heap
+     int    link[512];   // allocate link array
+     bits32 code[256];   // huffman codes
+     byte   clen[256];   // bit lengths of codes
+ 
+     memset(comp,0,sizeof(byte)   * (data_len + 1));
+     memset(freq,0,sizeof(size_t) * 512);
+     memset(heap,0,sizeof(size_t) * 256);
+     memset(link,0,sizeof(int)    * 512);
+     memset(code,0,sizeof(bits32) * 256);
+     memset(clen,0,sizeof(byte)   * 256);
+ 
+     // count frequencies
+     for (i = 0; i < data_len; ++i)
+     {
+         ++freq[(size_t)(*dptr)];
+         ++dptr;
+     }
+ 
+     // create indirect heap based on frequencies
+     n = 0; 
+ 
+     for (i = 0; i < 256; ++i)
+     {
+         if (freq[i])
+         {
+             heap[n] = i;
+             ++n;
+         }
+     }
+ 
+     for (i = n; i > 0; --i)
+         heap_adjust(freq,heap,n,i);
+ 
+     // generate a trie from heap
+     size_t temp;
+ 
+     // at this point, n contains the number of characters
+     // that occur in the data array
+     while (n > 1)
+     {
+         // take first item from top of heap
+         --n;
+         temp    = heap[0];
+         heap[0] = heap[n];
+     
+         // adjust the heap to maintain properties
+         heap_adjust(freq,heap,n,1);
+     
+         // in upper half of freq array, store sums of 
+         // the two smallest frequencies from the heap
+         freq[256 + n] = freq[heap[0]] + freq[temp];
+         link[temp]    =  256 + n; // parent
+         link[heap[0]] = -256 - n; // left child
+         heap[0]       =  256 + n; // right child
+     
+         // adjust the heap again
+         heap_adjust(freq,heap,n,1);
+     }
+ 
+     link[256 + n] = 0;
+ 
+     // generate codes
+     size_t m, x, maxx = 0, maxi = 0;
+     int l;
+ 
+     for (m = 0; m < 256; ++m)
+     {
+         if (!freq[m]) // character does not occur
+         {
+             code[m] = 0;
+             clen[m] = 0;
+         }
+         else
+         {
+             i = 0;       // length of current code
+             j = 1;       // bit being set in code
+             x = 0;       // code being built
+             l = link[m]; // link in trie
+         
+             while (l) // while not at end of trie
+             {
+                 if (l < 0) // left link (negative)
+                 {
+                     x +=  j; // insert 1 into code
+                     l  = -l; // reverse sign
+                 }
+             
+                 l  = link[l]; // move to next link
+                 j <<= 1;      // next bit to be set
+                 ++i;          // increment code length
+             }
+         
+             code[m] = (unsigned long)x; // save code
+             clen[m] = (unsigned char)i; // save code len
+         
+             // keep track of biggest key
+             if (x > maxx)
+                 maxx = x;
+         
+             // keep track of longest key
+             if (i > maxi)
+                 maxi = i;
+         }
+     }
+ 
+     // make sure longest codes fit in unsigned long-bits
+     if (maxi > (sizeof(unsigned long) * 8))
+     {
+         fprintf(stderr,"error: bit code overflow\n");
+         exit(1);
+     }
+ 
+     // encode data
+     size_t comp_len =  0;   // number of data_len output
+     char   bout =  0;   // byte of encoded data
+     int    bit  = -1;   // count of bits stored in bout
+     dptr = data;
+     
+     // watch for one-value file!
+     if (maxx == 0)
+     {
+         fprintf(stderr,"error: file has only one value!\n");
+         exit(1);
+     }
+ 
+     for (j = 0; j < data_len; ++j)
+     {
+         // start copying at first bit of code
+         mask = 1 << (clen[(*dptr)] - 1);
+     
+         // copy code bits
+         for (i = 0; i < clen[(*dptr)]; ++i)
+         {
+             if (bit == 7)
+             {
+                 // store full output byte
+                 comp[comp_len] = bout;
+                 ++comp_len;
+             
+                 // check for output longer than input!
+                 if (comp_len == data_len)
+                 {
+                     fprintf(stderr,"error: no compression\n");
+                     exit(1);
+                 }
+             
+                 bit  = 0;
+                 bout = 0;
+             }
+             else
+             {
+                 // move to next bit
+                 ++bit;
+                 bout <<= 1;
+             }
+         
+             if (code[(*dptr)] & mask)
+                 bout |= 1;
+         
+             mask >>= 1;
+         }
+     
+         ++dptr;
+     }
+ 
+     // output any incomplete data_len and bits
+     bout <<= (7 - bit);
+     comp[comp_len] = bout;
+     ++comp_len;
+     
+     // printf("data len = %u\n",data_len);
+     // printf("comp len = %u\n",comp_len);
+     
+     /*
+         DECOMPRESSION
+     */
+ 
+     // allocate heap2
+     bits32 heap2[256];
+     
+     // allocate output character buffer
+     char outc[256];
+     
+     // initialize work areas
+     memset(heap2,0,256 * sizeof(bits32));
+     
+     // create decode table as trie heap2
+     char * optr = outc;
+     
+     for (j = 0; j < 256; ++j)
+     {
+         (*optr) = (char)j;
+         ++optr;
+         
+         // if code exists for this byte
+         if (code[j] | clen[j])
+         {
+             // begin at first code bit
+             k = 0;
+             mask = 1 << (clen[j] - 1);
+             
+             // find proper node, using bits in
+             // code as path.
+             for (i = 0; i < clen[j]; ++i)
+             {
+                 k = k * 2 + 1; // right link
+                 
+                 if (code[j] & mask)
+                     ++k; // go left
+                 
+                 mask >>= 1; // next bit
+             }
+             
+             heap2[j] = k; // store link in heap2
+         }
+     }
+     
+     // sort outc based on heap2
+     for (i = 1; i < 256; ++i)
+     {
+         t = heap2[i];
+         c = outc[i];
+         j = i;
+         
+         while ((j) && (heap2[j-1] > t))
+         {
+             heap2[j] = heap2[j - 1];
+             outc[j] = outc[j - 1];
+             --j;
+         }
+         
+         heap2[j] = t;
+         outc[j] = c;
+     }
+     
+     // find first character in table
+     for (j = 0; heap2[j] == 0; ++j) ;
+     
+     // decode data
+     k    = 0; // link in trie
+     i    = j; 
+     mask = 0x80;
+     n    = 0;
+     cptr = comp;
+     dptr = data;
+     
+     while (n < data_len)
+     {
+         k = k * 2 + 1; // right link
+         
+         if ((*cptr) & mask)
+             ++k; // left link if bit on
+         
+         // search heap2 until link >= k
+         while (heap2[i] < k)
+             ++i;
+         
+         // code matches, character found
+         if (k == heap2[i])
+         {
+             (*dptr) = outc[i];
+             ++dptr;
+             ++n;
+             k = 0;
+             i = j;
+         }
+         
+         // move to next bit
+         if (mask > 1)
+             mask >>= 1;
+         else // code extends into next byte
+         {
+             mask = 0x80;
+             ++cptr;
+         }
+     }
+     
+     // remove work areas
+     free(comp);
+ }
+ 
+ int main(int argc, char ** argv)
+ {
+     int i;
+     
+     // do we have verbose output?
+     bool ga_testing = false;
+     
+     if (argc > 1)
+     {
+         for (i = 1; i < argc; ++i)
+         {
+             if (!strcmp(argv[1],"-ga"))
+             {
+                 ga_testing = true;
+                 break;
+             }
+         }
+     }
+     
+     // initialization
+     byte * test_data = generate_test_data(TEST_SIZE);
+     
+     /*
+     FILE * before = fopen("before","wb");
+     fwrite(test_data,1,TEST_SIZE,before);
+     fclose(before);
+     */
+             
+     // get starting time    
+     //struct timespec start, stop;
+     //clock_gettime(CLOCK_REALTIME,&start);
+ 
+     // what we're timing
+     for (i = 0; i < NUM_LOOPS; ++i)
+         compdecomp(test_data,TEST_SIZE);
+     
+     // calculate run time
+     //clock_gettime(CLOCK_REALTIME,&stop);        
+     double run_time = 0; //(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
+ 
+     /*
+     FILE * after = fopen("after","wb");
+     fwrite(test_data,1,TEST_SIZE,after);
+     fclose(after);
+     */
+     
+     // release resources
+     free(test_data);
+ 
+     // report runtime
+     if (ga_testing)
+         fprintf(stdout,"%f",run_time);
+     else        
+         fprintf(stdout,"\nhuffbench (Std. C) run time: %f\n\n",run_time);
+     
+     fflush(stdout);
+     
+     // done
+     return 0;
+ }


Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c:1.1
*** /dev/null	Sat Mar  4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c	Sat Mar  4 16:35:18 2006
***************
*** 0 ****
--- 1,408 ----
+ /*
+     lpbench
+     Standard C version
+     17 April 2003
+ 
+     Written by Scott Robert Ladd (scott at coyotegulch.com)
+     No rights reserved. This is public domain software, for use by anyone.
+ 
+     A number-crunching benchmark that can be used as a fitness test for
+     evolving optimal compiler options via genetic algorithm.
+ 
+     This program is a modernized version of the classic Linpack 
+     benchmark. My implementation is based on Bonnie Toy's translation
+     of the original Fortran source code, including modifications by Jack
+     Dongarra and Roy Longbottom.
+     
+     I've updated the code as follows:
+     
+       1) ANSI C code
+       2) A timing mechanism based on the POSIX clock_gettime() function
+       3) Replaces C's rather weak random number generator
+       4) Compute results for a 2500x2500 (6.25 million element) matrix
+ 
+     Note that the code herein is design for the purpose of testing 
+     computational performance; error handling and other such "niceties"
+     is virtually non-existent.
+ 
+     Actual benchmark results can be found at:
+             http://www.coyotegulch.com
+ 
+     Please do not use this information or algorithm in any way that might
+     upset the balance of the universe or otherwise cause a disturbance in
+     the space-time continuum.
+ */
+ 
+ #include <time.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <math.h>
+ #include <stdbool.h>
+ 
+ // embedded random number generator; ala Park and Miller
+ static       long seed = 1325;
+ static const long IA   = 16807;
+ static const long IM   = 2147483647;
+ static const double AM = 4.65661287525E-10;
+ static const long IQ   = 127773;
+ static const long IR   = 2836;
+ static const long MASK = 123459876;
+ 
+ static double random_double()
+ {
+     long k;
+     double result;
+     
+     seed ^= MASK;
+     k = seed / IQ;
+     seed = IA * (seed - k * IQ) - IR * k;
+     
+     if (seed < 0)
+         seed += IM;
+     
+     result = AM * seed;
+     seed ^= MASK;
+     
+     return result;
+ }
+ 
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int N   = 1000;
+ static const int NM1 =  999; // N - 1
+ static const int NP1 = 1001; // N + 1
+ #else
+ static const int N   =  600;
+ static const int NM1 =  599; // N - 1
+ static const int NP1 =  601; // N + 1
+ #endif
+ #else
+ static const int N   = 2000;
+ static const int NM1 = 1999; // N - 1
+ static const int NP1 = 2001; // N + 1
+ #endif
+ 
+ // benchmark code
+ void matgen (double ** a, double * b)
+ {
+     // fill arrays
+     int i, j;
+ 
+     for (i = 0; i < N; i++)
+         for (j = 0; j < N; j++)
+             a[j][i] = random_double();
+ 
+     for (i = 0; i < N; i++)
+         b[i] = 0.0;
+ 
+     for (j = 0; j < N; j++)
+         for (i = 0; i < N; i++)
+             b[i] += a[j][i];
+ }
+ 
+ // finds the index of element having max. absolute value.
+ int idamax(int n, double * dx, int dx_off, int incx)
+ {
+     double dmax, dtemp;
+     int i, ix, itemp = 0;
+ 
+     if (n < 1) 
+         itemp = -1;
+     else
+     {
+         if (n ==1)
+             itemp = 0;
+         else
+         {
+             if (incx != 1)
+             {
+                 // code for increment not equal to 1
+                 dmax = fabs(dx[dx_off]);
+                 ix = 1 + incx;
+ 
+                 for (i = 1; i < n; i++)
+                 {
+ 	                dtemp = fabs(dx[ix + dx_off]);
+ 
+                     if (dtemp > dmax) 
+                     {
+ 	                    itemp = i;
+ 	                    dmax = dtemp;
+                     }
+ 
+                     ix += incx;
+                 }
+             }
+             else
+             {
+                 // code for increment equal to 1
+                 itemp = 0;
+                 dmax = fabs(dx[dx_off]);
+ 
+                 for (i = 1; i < n; i++)
+                 {
+ 	                dtemp = fabs(dx[i + dx_off]);
+ 
+ 	                if (dtemp > dmax)
+                     {
+                         itemp = i;
+                         dmax = dtemp;
+                     }
+                 }
+             }
+         }
+     }
+ 
+     return itemp;
+ }
+     
+ // forms the dot product of two vectors.
+ static double ddot(int n, double * dx, int dx_off, int incx, double * dy, int dy_off, int incy)
+ {
+     int i;
+     double dtemp = 0.0;
+ 
+     if (n > 0)
+     {
+         if (incx != 1 || incy != 1)
+         {
+             // code for unequal increments or equal increments not equal to 1
+             int ix = 0;
+ 	        int iy = 0;
+ 
+             if (incx < 0)
+                 ix = (-n + 1) * incx;
+ 
+ 	        if (incy < 0)
+                 iy = (-n + 1) * incy;
+ 
+             for (i = 0;i < n; i++)
+             {
+                 dtemp += dx[ix + dx_off] * dy[iy + dy_off];
+                 ix    += incx;
+                 iy    += incy;
+             }
+         }
+         else
+         {
+             // code for both increments equal to 1
+             for (i=0; i < n; i++)
+                 dtemp += dx[i + dx_off] * dy[i + dy_off];
+         }
+     }
+ 
+     return(dtemp);
+ }
+ 
+ // scales a vector by a constant.
+ void dscal(int n, double da, double * dx, int dx_off, int incx)
+ {
+     int i;
+ 
+     if (n > 0)
+     {
+         if (incx != 1)
+         {
+             // code for increment not equal to 1
+ 	        int nincx = n * incx;
+ 
+             for (i = 0; i < nincx; i += incx)
+                 dx[i + dx_off] *= da;
+         }
+         else
+         {
+             // code for increment equal to 1
+             for (i = 0; i < n; i++)
+                 dx[i + dx_off] *= da;
+         }
+     }
+ }
+ 
+ //  constant times a vector plus a vector.
+ void daxpy(int n, double da, double * dx, int dx_off, int incx, double * dy, int dy_off, int incy)
+ {
+     int i;
+ 
+     if ((n > 0) && (da != 0))
+     {
+         if (incx != 1 || incy != 1)
+         {
+             // code for unequal increments or equal increments not equal to 1
+             int ix = 0;
+             int iy = 0;
+ 
+             if (incx < 0)
+                 ix = (1 - n) * incx;
+ 
+             if (incy < 0)
+                 iy = (1 - n) * incy;
+ 
+             for (i = 0; i < n; i++)
+             {
+                 dy[iy + dy_off] += da * dx[ix + dx_off];
+                 ix += incx;
+                 iy += incy;
+ 	        }
+ 
+ 	        return;
+         }
+         else
+         {
+             // code for both increments equal to 1
+             for (i = 0; i < n; i++)
+                 dy[i + dy_off] += da * dx[i + dx_off];
+         }
+     }
+ }
+ 
+ // Factors a double precision matrix by gaussian elimination.
+ void dgefa(double ** a, int * ipvt)
+ {
+     double temp;
+     int k, j;
+ 
+     for (k = 0; k < NM1; k++)
+     {
+         double * col_k = a[k];
+         int kp1 = k + 1;
+ 
+         // find l = pivot index
+         int l = idamax(N - k, col_k, k, 1) + k;
+ 	    ipvt[k] = l;
+ 
+     	// zero pivot implies this column already triangularized
+         if (col_k[l] != 0)
+         {
+ 	        // interchange if necessary
+     	    if (l != k)
+             {
+     	        temp     = col_k[l];
+ 	            col_k[l] = col_k[k];
+ 	            col_k[k] = temp;
+             }
+ 
+             // compute multipliers
+             temp = -1.0 / col_k[k];
+             dscal(N - kp1, temp, col_k, kp1, 1);
+ 
+     	    // row elimination with column indexing
+             for (j = kp1; j < N; j++)
+             {
+ 	            double * col_j = a[j];
+ 	            temp = col_j[l];
+ 
+     	        if (l != k)
+                 {
+                     col_j[l] = col_j[k];
+                     col_j[k] = temp;
+                 }
+ 
+                 daxpy(N - kp1, temp, col_k, kp1, 1, col_j, kp1, 1);
+             }
+         }
+     }
+ 
+     ipvt[N - 1] = N - 1;
+ }
+ 
+ //  Solves the double precision system a * x = b  or trans(a) * x = b
+ //  using the factors computed by dgeco or dgefa.
+ void dgesl(double ** a, int * ipvt, double * b)
+ {
+     double t;
+     int k, kb;
+ 
+     // solve  a * x = b.  first solve  l*y = b
+     for (k = 0; k < NM1; k++)
+     {
+         int l = ipvt[k];
+         t = b[l];
+ 
+         if (l != k)
+         {
+             b[l] = b[k];
+             b[k] = t;
+         }
+ 
+         int kp1 = k + 1;
+         daxpy(N - kp1, t, a[k], kp1, 1, b, kp1, 1);
+     }
+ 
+     // now solve  u*x = y
+     for (kb = 0; kb < N; kb++)
+     {
+         k     = N - (kb + 1);
+         b[k] /= a[k][k];
+         t     = -b[k];
+         daxpy(k, t, a[k], 0, 1, b, 0, 1);
+     }
+ }
+ 
+ int main(int argc, char ** argv)
+ {
+     int i;
+     
+     // do we have verbose output?
+     bool ga_testing = false;
+     
+     if (argc > 1)
+     {
+         for (i = 1; i < argc; ++i)
+         {
+             if (!strcmp(argv[1],"-ga"))
+             {
+                 ga_testing = true;
+                 break;
+             }
+         }
+     }
+     
+     double ** a = (double **)malloc(sizeof(double) * N);
+     
+     for (i = 0; i < N; ++i)
+         a[i] = (double *)malloc(sizeof(double) * NP1);
+     
+     double * b = (double *)malloc(sizeof(double) * N);
+     double * x = (double *)malloc(sizeof(double) * N);
+     int * ipvt = (int *)malloc(sizeof(int)    * N);
+ 
+     // calculate operations per timeInSeconds
+     double ops = (2.0 * ((double)N * N * N)) / 3.0 + 2.0 * ((double)N * N);
+ 
+     // generate matrix    
+     matgen(a,b);
+     
+     // get starting time    
+     //struct timespec start, stop;
+     //clock_gettime(CLOCK_REALTIME,&start);
+ 
+     // what we're timing
+     dgefa(a,ipvt);
+     dgesl(a,ipvt,b);
+     
+     // calculate run time
+     //clock_gettime(CLOCK_REALTIME,&stop);        
+     double run_time = 0;//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
+     
+     // clean up
+     free(ipvt);
+     free(x);
+     free(b);
+     
+     for (i = 0; i < N; ++i)
+         free(a[i]);
+     
+     free(a);
+ 
+     // report runtime
+     if (ga_testing)
+         fprintf(stdout,"%f",run_time);
+     else        
+         fprintf(stdout,"\nlpbench (Std. C) run time: %f\n\n",run_time);
+     
+     fflush(stdout);
+     
+     // done
+     return 0;
+ }






More information about the llvm-commits mailing list