[llvm-commits] CVS: llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt Makefile almabench.c fftbench.cpp huffbench.c lpbench.c
Chris Lattner
lattner at cs.uiuc.edu
Sat Mar 4 14:35:30 PST 2006
Changes in directory llvm-test/SingleSource/Benchmarks/CoyoteBench:
LICENSE.txt added (r1.1)
Makefile added (r1.1)
almabench.c added (r1.1)
fftbench.cpp added (r1.1)
huffbench.c added (r1.1)
lpbench.c added (r1.1)
---
Log message:
add an initial version of this benchmark to the testsuite. This may be updated
in the future if I get a new version from the author
---
Diffs of the changes: (+1981 -0)
LICENSE.txt | 3
Makefile | 5
almabench.c | 378 +++++++++++++++++++++++++++++++
fftbench.cpp | 699 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
huffbench.c | 488 +++++++++++++++++++++++++++++++++++++++++
lpbench.c | 408 ++++++++++++++++++++++++++++++++++
6 files changed, 1981 insertions(+)
Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt:1.1
*** /dev/null Sat Mar 4 16:35:28 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/LICENSE.txt Sat Mar 4 16:35:18 2006
***************
*** 0 ****
--- 1,3 ----
+ Written by Scott Robert Ladd.
+ No rights reserved. This is public domain software, for use by anyone.
+
Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/Makefile
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/Makefile:1.1
*** /dev/null Sat Mar 4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/Makefile Sat Mar 4 16:35:18 2006
***************
*** 0 ****
--- 1,5 ----
+ LEVEL = ../../..
+ LDFLAGS += -lm
+
+ include $(LEVEL)/SingleSource/Makefile.singlesrc
+
Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/almabench.c
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/almabench.c:1.1
*** /dev/null Sat Mar 4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/almabench.c Sat Mar 4 16:35:18 2006
***************
*** 0 ****
--- 1,378 ----
+ /*
+ almabench
+ Standard C version
+ 17 April 2003
+
+ Written by Scott Robert Ladd.
+ No rights reserved. This is public domain software, for use by anyone.
+
+ A number-crunching benchmark that can be used as a fitness test for
+ evolving optimal compiler options via genetic algorithm.
+
+ This program calculates the daily ephemeris (at noon) for the years
+ 2000-2099 using an algorithm developed by J.L. Simon, P. Bretagnon, J.
+ Chapront, M. Chapront-Touze, G. Francou and J. Laskar of the Bureau des
+ Longitudes, Paris, France), as detailed in Astronomy & Astrophysics
+ 282, 663 (1994)
+
+ Note that the code herein is design for the purpose of testing
+ computational performance; error handling and other such "niceties"
+ is virtually non-existent.
+
+ Actual benchmark results can be found at:
+ http://www.coyotegulch.com
+
+ Please do not use this information or algorithm in any way that might
+ upset the balance of the universe or otherwise cause planets to impact
+ upon one another.
+ */
+
+ #include <math.h>
+ #include <time.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <stdbool.h>
+
+ #define CALC_PI 3.14159265358979323846
+
+ static const double PI = CALC_PI;
+ static const double J2000 = 2451545.0;
+ static const double JCENTURY = 36525.0;
+ static const double JMILLENIA = 365250.0;
+ static const double TWOPI = 2.0 * CALC_PI;
+ static const double A2R = CALC_PI / 648000.0;
+ static const double R2H = 12.0 / CALC_PI;
+ static const double R2D = 180.0 / CALC_PI;
+ static const double GAUSSK = 0.01720209895;
+
+ // number of days to include in test
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int TEST_LOOPS = 4;
+ #else
+ static const int TEST_LOOPS = 1;
+ #endif
+ #else
+ static const int TEST_LOOPS = 20;
+ #endif
+
+ static const int TEST_LENGTH = 36525;
+
+ // sin and cos of j2000 mean obliquity (iau 1976)
+ static const double sineps = 0.3977771559319137;
+ static const double coseps = 0.9174820620691818;
+
+ static const double amas [8] = { 6023600.0, 408523.5, 328900.5, 3098710.0, 1047.355, 3498.5, 22869.0, 19314.0 };
+
+ // tables giving the mean keplerian elements, limited to t**2 terms:
+ // a semi-major axis (au)
+ // dlm mean longitude (degree and arcsecond)
+ // e eccentricity
+ // pi longitude of the perihelion (degree and arcsecond)
+ // dinc inclination (degree and arcsecond)
+ // omega longitude of the ascending node (degree and arcsecond)
+ static const double a [8][3] =
+ { { 0.3870983098, 0, 0 },
+ { 0.7233298200, 0, 0 },
+ { 1.0000010178, 0, 0 },
+ { 1.5236793419, 3e-10, 0 },
+ { 5.2026032092, 19132e-10, -39e-10 },
+ { 9.5549091915, -0.0000213896, 444e-10 },
+ { 19.2184460618, -3716e-10, 979e-10 },
+ { 30.1103868694, -16635e-10, 686e-10 } };
+
+ static const double dlm[8][3] =
+ { { 252.25090552, 5381016286.88982, -1.92789 },
+ { 181.97980085, 2106641364.33548, 0.59381 },
+ { 100.46645683, 1295977422.83429, -2.04411 },
+ { 355.43299958, 689050774.93988, 0.94264 },
+ { 34.35151874, 109256603.77991, -30.60378 },
+ { 50.07744430, 43996098.55732, 75.61614 },
+ { 314.05500511, 15424811.93933, -1.75083 },
+ { 304.34866548, 7865503.20744, 0.21103 } };
+
+ static const double e[8][3] =
+ { { 0.2056317526, 0.0002040653, -28349e-10 },
+ { 0.0067719164, -0.0004776521, 98127e-10 },
+ { 0.0167086342, -0.0004203654, -0.0000126734 },
+ { 0.0934006477, 0.0009048438, -80641e-10 },
+ { 0.0484979255, 0.0016322542, -0.0000471366 },
+ { 0.0555481426, -0.0034664062, -0.0000643639 },
+ { 0.0463812221, -0.0002729293, 0.0000078913 },
+ { 0.0094557470, 0.0000603263, 0 } };
+
+ static const double pi[8][3] =
+ { { 77.45611904, 5719.11590, -4.83016 },
+ { 131.56370300, 175.48640, -498.48184 },
+ { 102.93734808, 11612.35290, 53.27577 },
+ { 336.06023395, 15980.45908, -62.32800 },
+ { 14.33120687, 7758.75163, 259.95938 },
+ { 93.05723748, 20395.49439, 190.25952 },
+ { 173.00529106, 3215.56238, -34.09288 },
+ { 48.12027554, 1050.71912, 27.39717 } };
+
+ static const double dinc[8][3] =
+ { { 7.00498625, -214.25629, 0.28977 },
+ { 3.39466189, -30.84437, -11.67836 },
+ { 0, 469.97289, -3.35053 },
+ { 1.84972648, -293.31722, -8.11830 },
+ { 1.30326698, -71.55890, 11.95297 },
+ { 2.48887878, 91.85195, -17.66225 },
+ { 0.77319689, -60.72723, 1.25759 },
+ { 1.76995259, 8.12333, 0.08135 } };
+
+ static const double omega[8][3] =
+ { { 48.33089304, -4515.21727, -31.79892 },
+ { 76.67992019, -10008.48154, -51.32614 },
+ { 174.87317577, -8679.27034, 15.34191 },
+ { 49.55809321, -10620.90088, -230.57416 },
+ { 100.46440702, 6362.03561, 326.52178 },
+ { 113.66550252, -9240.19942, -66.23743 },
+ { 74.00595701, 2669.15033, 145.93964 },
+ { 131.78405702, -221.94322, -0.78728 } };
+
+ // tables for trigonometric terms to be added to the mean elements
+ // of the semi-major axes.
+ static const double kp[8][9] =
+ { { 69613.0, 75645.0, 88306.0, 59899.0, 15746.0, 71087.0, 142173.0, 3086.0, 0.0 },
+ { 21863.0, 32794.0, 26934.0, 10931.0, 26250.0, 43725.0, 53867.0, 28939.0, 0.0 },
+ { 16002.0, 21863.0, 32004.0, 10931.0, 14529.0, 16368.0, 15318.0, 32794.0, 0.0 },
+ { 6345.0, 7818.0, 15636.0, 7077.0, 8184.0, 14163.0, 1107.0, 4872.0, 0.0 },
+ { 1760.0, 1454.0, 1167.0, 880.0, 287.0, 2640.0, 19.0, 2047.0, 1454.0 },
+ { 574.0, 0.0, 880.0, 287.0, 19.0, 1760.0, 1167.0, 306.0, 574.0 },
+ { 204.0, 0.0, 177.0, 1265.0, 4.0, 385.0, 200.0, 208.0, 204.0 },
+ { 0.0, 102.0, 106.0, 4.0, 98.0, 1367.0, 487.0, 204.0, 0.0 } };
+
+ static const double ca[8][9] =
+ { { 4.0, -13.0, 11.0, -9.0, -9.0, -3.0, -1.0, 4.0, 0.0 },
+ { -156.0, 59.0, -42.0, 6.0, 19.0, -20.0, -10.0, -12.0, 0.0 },
+ { 64.0, -152.0, 62.0, -8.0, 32.0, -41.0, 19.0, -11.0, 0.0 },
+ { 124.0, 621.0, -145.0, 208.0, 54.0, -57.0, 30.0, 15.0, 0.0 },
+ { -23437.0, -2634.0, 6601.0, 6259.0, -1507.0, -1821.0, 2620.0, -2115.0,-1489.0 },
+ { 62911.0,-119919.0, 79336.0, 17814.0,-24241.0, 12068.0, 8306.0, -4893.0, 8902.0 },
+ { 389061.0,-262125.0,-44088.0, 8387.0,-22976.0, -2093.0, -615.0, -9720.0, 6633.0 },
+ { -412235.0,-157046.0,-31430.0, 37817.0, -9740.0, -13.0, -7449.0, 9644.0, 0.0 } };
+
+ static const double sa[8][9] =
+ { { -29.0, -1.0, 9.0, 6.0, -6.0, 5.0, 4.0, 0.0, 0.0 },
+ { -48.0, -125.0, -26.0, -37.0, 18.0, -13.0, -20.0, -2.0, 0.0 },
+ { -150.0, -46.0, 68.0, 54.0, 14.0, 24.0, -28.0, 22.0, 0.0 },
+ { -621.0, 532.0, -694.0, -20.0, 192.0, -94.0, 71.0, -73.0, 0.0 },
+ { -14614.0,-19828.0, -5869.0, 1881.0, -4372.0, -2255.0, 782.0, 930.0, 913.0 },
+ { 139737.0, 0.0, 24667.0, 51123.0, -5102.0, 7429.0, -4095.0, -1976.0,-9566.0 },
+ { -138081.0, 0.0, 37205.0,-49039.0,-41901.0,-33872.0,-27037.0,-12474.0,18797.0 },
+ { 0.0, 28492.0,133236.0, 69654.0, 52322.0,-49577.0,-26430.0, -3593.0, 0.0 } };
+
+ // tables giving the trigonometric terms to be added to the mean
+ // elements of the mean longitudes.
+ static const double kq[8][10] =
+ { { 3086.0, 15746.0, 69613.0, 59899.0, 75645.0, 88306.0, 12661.0, 2658.0, 0.0, 0.0 },
+ { 21863.0, 32794.0, 10931.0, 73.0, 4387.0, 26934.0, 1473.0, 2157.0, 0.0, 0.0 },
+ { 10.0, 16002.0, 21863.0, 10931.0, 1473.0, 32004.0, 4387.0, 73.0, 0.0, 0.0 },
+ { 10.0, 6345.0, 7818.0, 1107.0, 15636.0, 7077.0, 8184.0, 532.0, 10.0, 0.0 },
+ { 19.0, 1760.0, 1454.0, 287.0, 1167.0, 880.0, 574.0, 2640.0, 19.0,1454.0 },
+ { 19.0, 574.0, 287.0, 306.0, 1760.0, 12.0, 31.0, 38.0, 19.0, 574.0 },
+ { 4.0, 204.0, 177.0, 8.0, 31.0, 200.0, 1265.0, 102.0, 4.0, 204.0 },
+ { 4.0, 102.0, 106.0, 8.0, 98.0, 1367.0, 487.0, 204.0, 4.0, 102.0 } };
+
+ static const double cl[8][10] =
+ { { 21.0, -95.0, -157.0, 41.0, -5.0, 42.0, 23.0, 30.0, 0.0, 0.0 },
+ { -160.0, -313.0, -235.0, 60.0, -74.0, -76.0, -27.0, 34.0, 0.0, 0.0 },
+ { -325.0, -322.0, -79.0, 232.0, -52.0, 97.0, 55.0, -41.0, 0.0, 0.0 },
+ { 2268.0, -979.0, 802.0, 602.0, -668.0, -33.0, 345.0, 201.0, -55.0, 0.0 },
+ { 7610.0, -4997.0,-7689.0,-5841.0,-2617.0, 1115.0, -748.0, -607.0, 6074.0, 354.0 },
+ { -18549.0, 30125.0,20012.0, -730.0, 824.0, 23.0, 1289.0, -352.0,-14767.0,-2062.0 },
+ { -135245.0,-14594.0, 4197.0,-4030.0,-5630.0,-2898.0, 2540.0, -306.0, 2939.0, 1986.0 },
+ { 89948.0, 2103.0, 8963.0, 2695.0, 3682.0, 1648.0, 866.0, -154.0, -1963.0, -283.0 } };
+
+ static const double sl[8][10] =
+ { { -342.0, 136.0, -23.0, 62.0, 66.0, -52.0, -33.0, 17.0, 0.0, 0.0 },
+ { 524.0, -149.0, -35.0, 117.0, 151.0, 122.0, -71.0, -62.0, 0.0, 0.0 },
+ { -105.0, -137.0, 258.0, 35.0, -116.0, -88.0, -112.0, -80.0, 0.0, 0.0 },
+ { 854.0, -205.0, -936.0, -240.0, 140.0, -341.0, -97.0, -232.0, 536.0, 0.0 },
+ { -56980.0, 8016.0, 1012.0, 1448.0,-3024.0,-3710.0, 318.0, 503.0, 3767.0, 577.0 },
+ { 138606.0,-13478.0,-4964.0, 1441.0,-1319.0,-1482.0, 427.0, 1236.0, -9167.0,-1918.0 },
+ { 71234.0,-41116.0, 5334.0,-4935.0,-1848.0, 66.0, 434.0,-1748.0, 3780.0, -701.0 },
+ { -47645.0, 11647.0, 2166.0, 3194.0, 679.0, 0.0, -244.0, -419.0, -2531.0, 48.0 } };
+
+ //---------------------------------------------------------------------------
+ // Normalize angle into the range -pi <= A < +pi.
+ double anpm (double a)
+ {
+ double w = fmod(a,TWOPI);
+
+ if (fabs(w) >= PI)
+ w = w - ((a < 0) ? -TWOPI : TWOPI);
+
+ return w;
+ }
+
+ //---------------------------------------------------------------------------
+ // The reference frame is equatorial and is with respect to the
+ // mean equator and equinox of epoch j2000.
+ void planetpv (double epoch[2], int np, double pv[2][3])
+ {
+ // working storage
+ int i, j, k;
+ double t, da, dl, de, dp, di, doh, dmu, arga, argl, am;
+ double ae, dae, ae2, at, r, v, si2, xq, xp, tl, xsw;
+ double xcw, xm2, xf, ci2, xms, xmc, xpxq2, x, y, z;
+
+ // time: julian millennia since j2000.
+ t = ((epoch[0] - J2000) + epoch[1]) / JMILLENIA;
+
+ // compute the mean elements.
+ da = a[np][0] + (a[np][1] + a[np][2] * t ) * t;
+ dl = (3600.0 * dlm[np][0] + (dlm[np][1] + dlm[np][2] * t ) * t ) * A2R;
+ de = e[np][0] + (e[np][1] + e[np][2] * t ) * t;
+ dp = anpm((3600.0 * pi[np][0] + (pi[np][1] + pi[np][2] * t ) * t ) * A2R );
+ di = (3600.0 * dinc[np][0] + (dinc[np][1] + dinc[np][2] * t ) * t ) * A2R;
+ doh = anpm((3600.0 * omega[np][0] + (omega[np][1] + omega[np][2] * t ) * t ) * A2R );
+
+ // apply the trigonometric terms.
+ dmu = 0.35953620 * t;
+
+ for (k = 0; k < 8; ++k)
+ {
+ arga = kp[np][k] * dmu;
+ argl = kq[np][k] * dmu;
+ da = da + (ca[np][k] * cos(arga) + sa[np][k] * sin(arga)) * 0.0000001;
+ dl = dl + (cl[np][k] * cos(argl) + sl[np][k] * sin(argl)) * 0.0000001;
+ }
+
+ arga = kp[np][8] * dmu;
+ da = da + t * (ca[np][8] * cos(arga) + sa[np][8] * sin(arga)) * 0.0000001;
+
+ for (k = 8; k <= 9; ++k)
+ {
+ argl = kq[np][k] * dmu;
+ dl = dl + t * ( cl[np][k] * cos(argl) + sl[np][k] * sin(argl) ) * 0.0000001;
+ }
+
+ dl = fmod(dl,TWOPI);
+
+ // iterative solution of kepler's equation to get eccentric anomaly.
+ am = dl - dp;
+ ae = am + de * sin(am);
+ k = 0;
+
+ while (1)
+ {
+ dae = (am - ae + de * sin(ae)) / (1.0 - de * cos(ae));
+ ae = ae + dae;
+ k = k + 1;
+
+ if ((k >= 10) || (fabs(dae) < 1e-12))
+ break;
+ }
+
+ // true anomaly.
+ ae2 = ae / 2.0;
+ at = 2.0 * atan2(sqrt((1.0 + de)/(1.0 - de)) * sin(ae2), cos(ae2));
+
+ // distance (au) and speed (radians per day).
+ r = da * (1.0 - de * cos(ae));
+ v = GAUSSK * sqrt((1.0 + 1.0 / amas[np] ) / (da * da * da));
+
+ si2 = sin(di / 2.0);
+ xq = si2 * cos(doh);
+ xp = si2 * sin(doh);
+ tl = at + dp;
+ xsw = sin(tl);
+ xcw = cos(tl);
+ xm2 = 2.0 * (xp * xcw - xq * xsw );
+ xf = da / sqrt(1.0 - de*de);
+ ci2 = cos(di / 2.0);
+ xms = (de * sin(dp) + xsw) * xf;
+ xmc = (de * cos(dp) + xcw) * xf;
+ xpxq2 = 2.0 * xp * xq;
+
+ // position (j2000 ecliptic x,y,z in au).
+ x = r * (xcw - xm2 * xp);
+ y = r * (xsw + xm2 * xq);
+ z = r * (-xm2 * ci2);
+
+ // rotate to equatorial.
+ pv[0][0] = x;
+ pv[0][1] = y * coseps - z * sineps;
+ pv[0][2] = y * sineps + z * coseps;
+
+ // velocity (j2000 ecliptic xdot,ydot,zdot in au/d).
+ x = v * ((-1.0 + 2.0 * xp * xp) * xms + xpxq2 * xmc);
+ y = v * (( 1.0 - 2.0 * xq * xq ) * xmc - xpxq2 * xms);
+ z = v * (2.0 * ci2 * (xp * xms + xq * xmc));
+
+ // rotate to equatorial.
+ pv[1][0] = x;
+ pv[1][1] = y * coseps - z * sineps;
+ pv[1][2] = y * sineps + z * coseps;
+ }
+
+ //---------------------------------------------------------------------------
+ // Computes RA, Declination, and distance from a state vector returned by
+ // planetpv.
+ void radecdist(double state[2][3], double rdd[3])
+ {
+ // distance
+ rdd[2] = sqrt(state[0][0] * state[0][0] + state[0][1] * state[0][1] + state[0][2] * state[0][2]);
+
+ // RA
+ rdd[0] = atan2(state[0][1], state[0][0]) * R2H;
+ if (rdd[0] < 0.0) rdd[0] += 24.0;
+
+ // Declination
+ rdd[1] = asin(state[0][2] / rdd[2]) * R2D;
+ }
+
+ //---------------------------------------------------------------------------
+ // Entry point
+ // Calculate RA and Dec for noon on every day in 1900-2100
+ int main(int argc, char ** argv)
+ {
+ int i, n, p;
+ double jd[2];
+ double pv[2][3];
+ double position[3];
+ bool ga_testing = false;
+
+ // do we have verbose output?
+ if (argc > 1)
+ {
+ for (i = 1; i < argc; ++i)
+ {
+ if (!strcmp(argv[1],"-ga"))
+ {
+ ga_testing = true;
+ break;
+ }
+ }
+ }
+
+ // get starting time
+
+ // main loop
+ for (i = 0; i < TEST_LOOPS; ++i)
+ {
+ jd[0] = J2000;
+ jd[1] = 0.0;
+
+ for (n = 0; n < TEST_LENGTH; ++n)
+ {
+ jd[0] += 1.0;
+
+ for (p = 0; p < 8; ++p)
+ {
+ planetpv(jd,p,pv);
+ radecdist(pv,position);
+ }
+ }
+ }
+
+ // get final time
+
+ // report runtime
+
+ fflush(stdout);
+
+ return 0;
+ }
Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/fftbench.cpp
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/fftbench.cpp:1.1
*** /dev/null Sat Mar 4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/fftbench.cpp Sat Mar 4 16:35:18 2006
***************
*** 0 ****
--- 1,699 ----
+ /*
+ fftbench
+ Standard C++ version
+ 10 May 2003
+
+ Written by Scott Robert Ladd.
+ No rights reserved. This is public domain software, for use by anyone.
+
+ A number-crunching benchmark that can be used as a fitness test for
+ evolving optimal compiler options via genetic algorithm.
+
+ A simplified version of the FFT component from my standard libcoyote
+ library, this benchmark test FFT performance on complex<double> values.
+
+ Note that the code herein is design for the purpose of testing
+ computational performance; error handling and other such "niceties"
+ is virtually non-existent.
+
+ Actual benchmark results can be found at:
+ http://www.coyotegulch.com
+
+ Please do not use this information or algorithm in any way that might
+ upset the balance of the universe or to produce imaginary friends for
+ imaginary numbers.
+ */
+
+ #include <cmath>
+ #include <complex>
+ #include <ctime>
+ #include <cstring>
+ #include <iostream>
+ #include <iomanip>
+ #include <stdexcept>
+
+ using namespace std;
+
+ //---------------------------------------------------------------------------
+ // number of loops to perform
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int TEST_SIZE = 262144;
+ #else
+ static const int TEST_SIZE = 131072;
+ #endif
+ #else
+ static const int TEST_SIZE = 2097152;
+ #endif
+
+ //---------------------------------------------------------------------------
+ // embedded random number generator; ala Park and Miller
+ static double random_double()
+ {
+ static long seed = 1325;
+ static const long IA = 16807;
+ static const long IM = 2147483647;
+ static const double AM = 4.65661287525E-9;
+ static const long IQ = 127773;
+ static const long IR = 2836;
+ static const long MASK = 123459876;
+
+ long k;
+ double result;
+
+ seed ^= MASK;
+ k = seed / IQ;
+ seed = IA * (seed - k * IQ) - IR * k;
+
+ if (seed < 0)
+ seed += IM;
+
+ result = AM * seed;
+ seed ^= MASK;
+
+ return result;
+ }
+
+ //---------------------------------------------------------------------------
+ template <class T>
+ class polynomial
+ {
+ public:
+ // creation constructor, uninitialized (coefficients will be unknown values)
+ polynomial(size_t degree);
+
+ // creation constructor, with initialization from array
+ polynomial(size_t degree, const T * coefficients);
+
+ // creation constructor, with single-value initialization constructor
+ polynomial(size_t degree, const T & value);
+
+ // copy constructor
+ polynomial(const polynomial<T> & source);
+
+ // destructor
+ virtual ~polynomial();
+
+ // assignment
+ polynomial<T> & operator = (const polynomial<T> & source);
+
+ // initialize to specific value
+ void initialize(const T & value = T(0));
+
+ // increase polynomial length
+ polynomial<T> & stretch(size_t degrees);
+
+ // interrogate for degree
+ size_t degree() const
+ {
+ return m_degree;
+ }
+
+ // get specific coefficient
+ T get(size_t term) const;
+
+ T & operator [] (size_t term);
+
+ // evaluate for a specific value
+ T operator() (const T & x) const;
+
+ // unitary operators
+ polynomial<T> operator - () const;
+ polynomial<T> operator + () const;
+
+ // binary mathematical operators
+ polynomial<T> operator + (const polynomial<T> & poly) const;
+ polynomial<T> operator - (const polynomial<T> & poly) const;
+
+ // constant required by FFT routines
+ static const complex<T> PI2I;
+
+ // returns largest power of two that holds n
+ static size_t log2(size_t n);
+
+ // reverses a sequence of bits
+ static size_t flip_bits(size_t k, size_t bits);
+
+ // stretches the length of a polynomial to a power of two
+ size_t stretch_fft();
+
+ // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
+ static polynomial< complex<T> > bit_reverse(const polynomial<T> & poly);
+ static polynomial< complex<T> > bit_reverse(const polynomial< complex<T> > & poly);
+
+ // Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
+ static polynomial< complex<T> > fft(const polynomial<T> & poly);
+
+ // inverse FFT of polynomial< complex<T> > to polynomial<T>
+ static polynomial< complex<T> > inverse_fft(const polynomial< complex<T> > & poly);
+
+ // multiplication via FFT
+ polynomial<T> operator * (const polynomial<T> & poly) const;
+
+ // shorthand mathematical operators
+ polynomial<T> & operator += (const polynomial<T> & poly);
+ polynomial<T> & operator -= (const polynomial<T> & poly);
+ polynomial<T> & operator *= (const polynomial<T> & poly);
+
+ protected:
+ // coefficients
+ T * m_coeff;
+
+ // number of terms
+ size_t m_degree;
+
+ // acquire resources
+ void acquire();
+
+ // release resources
+ void release();
+
+ // deep copy
+ void deep_copy(const T * source);
+ };
+
+ // acquire resources
+ template <typename T>
+ inline void polynomial<T>::acquire()
+ {
+ m_coeff = new T [m_degree];
+ }
+
+ // release resources
+ template <typename T>
+ inline void polynomial<T>::release()
+ {
+ delete [] m_coeff;
+ }
+
+ // deep copy
+ template <typename T>
+ inline void polynomial<T>::deep_copy(const T * source)
+ {
+ for (size_t n = 0; n < m_degree; ++n)
+ m_coeff[n] = source[n];
+ }
+
+ // initialize to specific value
+ template <typename T>
+ inline void polynomial<T>::initialize(const T & value)
+ {
+ for (size_t n = 0; n < m_degree; ++n)
+ m_coeff[n] = value;
+ }
+
+ // creation constructor, uninitialized (coefficients will be unknown values)
+ template <typename T>
+ polynomial<T>::polynomial(size_t degree)
+ : m_coeff(NULL),
+ m_degree(degree)
+ {
+ acquire();
+ }
+
+ // creation constructor, with possible initialization from array
+ template <typename T>
+ polynomial<T>::polynomial(size_t degree, const T * coefficients)
+ : m_coeff(NULL),
+ m_degree(degree)
+ {
+ acquire();
+
+ if (coefficients != NULL)
+ deep_copy(coefficients);
+ }
+
+ // creation constructor, with single-value initialization constructor
+ template <typename T>
+ polynomial<T>::polynomial(size_t degree, const T & value)
+ : m_coeff(NULL),
+ m_degree(degree)
+ {
+ acquire();
+ initialize(value);
+ }
+
+ // copy constructor
+ template <typename T>
+ polynomial<T>::polynomial(const polynomial<T> & source)
+ : m_coeff(NULL),
+ m_degree(source.m_degree)
+ {
+ acquire();
+ deep_copy(source.m_coeff);
+ }
+
+ // destructor
+ template <typename T>
+ polynomial<T>::~polynomial()
+ {
+ release();
+ }
+
+ // assignment
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator = (const polynomial<T> & source)
+ {
+ if (m_degree != source.m_degree)
+ {
+ release();
+
+ m_degree = source.m_degree;
+ acquire();
+ }
+
+ deep_copy(source.m_coeff);
+
+ return *this;
+ }
+
+ // increase polynomial length
+ template <typename T>
+ polynomial<T> & polynomial<T>::stretch(size_t degrees)
+ {
+ if (degrees != 0)
+ {
+ T * old_coeff = m_coeff;
+ size_t old_degree = m_degree;
+
+ m_degree += degrees;
+ acquire();
+
+ size_t n = 0;
+
+ for (; n < old_degree; ++n)
+ m_coeff[n] = old_coeff[n];
+
+ for (; n < m_degree; ++n)
+ m_coeff[n] = T(0);
+ }
+
+ return *this;
+ }
+
+ // get specific coefficient
+ template <typename T>
+ T polynomial<T>::get(size_t term) const
+ {
+ return m_coeff[term];
+ }
+
+ template <typename T>
+ T & polynomial<T>::operator [] (size_t term)
+ {
+ return m_coeff[term];
+ }
+
+ // evaluate for a specific value
+ template <typename T>
+ T polynomial<T>::operator() (const T & x) const
+ {
+ // using Horner's Rule
+ T y = m_coeff[m_degree - 1];
+
+ size_t i = m_degree - 2;
+
+ while (true)
+ {
+ y = x * y + m_coeff[i];
+
+ if (i > 0)
+ --i;
+ else
+ break;
+ }
+
+ return y;
+ }
+
+ // unitary operators
+ template <typename T>
+ polynomial<T> polynomial<T>::operator - () const
+ {
+ polynomial<T> result(m_degree); // uninitialized
+
+ for (size_t n = 0; n < m_degree; ++n)
+ result.m_coeff[n] = -m_coeff[n];
+
+ return result;
+ }
+
+ template <typename T>
+ polynomial<T> polynomial<T>::operator + () const
+ {
+ return polynomial<T>(*this);
+ }
+
+ // binary mathematical operators
+ template <typename T>
+ polynomial<T> polynomial<T>::operator + (const polynomial<T> & poly) const
+ {
+ if (m_degree >= poly.m_degree)
+ {
+ polynomial<T> result(*this);
+
+ for (size_t n = 0; n < m_degree; ++n)
+ result.m_coeff[n] += poly.m_coeff[n];
+
+ return result;
+ }
+ else
+ {
+ polynomial<T> result(poly);
+
+ for (size_t n = 0; n < m_degree; ++n)
+ result.m_coeff[n] += m_coeff[n];
+
+ return result;
+ }
+ }
+
+ template <typename T>
+ polynomial<T> polynomial<T>::operator - (const polynomial<T> & poly) const
+ {
+ if (m_degree >= poly.m_degree)
+ {
+ polynomial<T> result(*this);
+
+ for (size_t n = 0; n < m_degree; ++n)
+ result.m_coeff[n] -= poly.m_coeff[n];
+
+ return result;
+ }
+ else
+ {
+ polynomial<T> result(poly);
+
+ for (size_t n = 0; n < m_degree; ++n)
+ result.m_coeff[n] -= m_coeff[n];
+
+ return result;
+ }
+ }
+
+ // constant required by FFT routines
+ template <typename T>
+ const complex<T> polynomial<T>::PI2I(0.0,6.283185307179586);
+
+ // returns largest power of two that holds n
+ template <typename T>
+ size_t polynomial<T>::log2(size_t n)
+ {
+ // returns 1 if n == 0!
+ size_t x = 1, c = 0;
+
+ while (x < n)
+ {
+ ++c;
+ x <<= 1;
+
+ if (x == 0)
+ break;
+ }
+
+ return c;
+ }
+
+ // reverses a sequence of bits
+ template <typename T>
+ size_t polynomial<T>::flip_bits(size_t k, size_t bits)
+ {
+ size_t lm = 1 << (bits - 1);
+ size_t rm = 1;
+ size_t r = 0;
+
+ while (lm)
+ {
+ if (k & rm)
+ r |= (lm);
+
+ lm >>= 1;
+ rm <<= 1;
+ }
+
+ return r;
+ }
+
+ // stretches the length of a polynomial to a power of two
+ template <class T>
+ size_t polynomial<T>::stretch_fft()
+ {
+ size_t n = 1;
+
+ while (true)
+ {
+ if (m_degree <= n)
+ break;
+
+ n <<= 1;
+
+ if (n == 0)
+ throw overflow_error("overflow in fft polynomial stretch");
+ }
+
+ n <<= 1;
+ n -= m_degree;
+
+ if (n > 0)
+ stretch(n);
+
+ return n;
+ }
+
+ // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial<T> & poly)
+ {
+ size_t b = log2(poly.degree());
+
+ polynomial< complex<T> > result(poly.degree());
+
+ for (size_t n = 0; n < poly.degree(); ++n)
+ result[flip_bits(n,b)] = poly.get(n);
+
+ return result;
+ }
+
+ // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial< complex<T> > & poly)
+ {
+ size_t b = log2(poly.degree());
+
+ polynomial< complex<T> > result(poly.degree());
+
+ for (size_t n = 0; n < poly.degree(); ++n)
+ result[flip_bits(n,b)] = poly.get(n);
+
+ return result;
+ }
+
+ // Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::fft(const polynomial<T> & poly)
+ {
+ size_t nl = log2(poly.degree());
+ size_t j, k, m, m2, s;
+ complex<T> wm, w, t, u;
+
+ polynomial< complex<T> > result = bit_reverse(poly);
+
+ m = 2;
+ m2 = 1;
+
+ for (s = 0; s < nl; ++s)
+ {
+ wm = exp(PI2I / complex<T>(m));
+ w = 1.0;
+
+ for (j = 0; j <= (m2 - 1); ++j)
+ {
+ for (k = j; k <= poly.degree() - 1; k += m)
+ {
+ t = w * result[k + m2];
+ u = result[k];
+ result[k] = u + t;
+ result[k + m2] = u - t;
+ }
+
+ w *= wm;
+ }
+
+ m <<= 1;
+ m2 <<= 1;
+ }
+
+ return result;
+ }
+
+ // inverse FFT of polynomial< complex<T> > to polynomial<T>
+ template <typename T>
+ polynomial< complex<T> > polynomial<T>::inverse_fft(const polynomial< complex<T> > & poly)
+ {
+ size_t nl = log2(poly.degree());
+ size_t j, k, m, m2, s;
+ complex<T> wm, w, t, u;
+
+ polynomial< complex<T> > result = bit_reverse(poly);
+
+ m = 2;
+ m2 = 1;
+
+ for (s = 0; s < nl; ++s)
+ {
+ wm = exp(-PI2I / complex<T>(m));
+ w = 1.0;
+
+ for (j = 0; j <= (m2 - 1); ++j)
+ {
+ for (k = j; k <= poly.degree() - 1; k += m)
+ {
+ t = w * result[k + m2];
+ u = result[k];
+ result[k] = u + t;
+ result[k + m2] = u - t;
+ }
+
+ w *= wm;
+ }
+
+ m <<= 1;
+ m2 <<= 1;
+ }
+
+ for (j = 0; j < poly.degree(); ++j)
+ result[j] /= double(poly.degree());
+
+ return result;
+ }
+
+ // multiplication by FFT
+ template <typename T>
+ polynomial<T> polynomial<T>::operator * (const polynomial<T> & poly) const
+ {
+ #if defined(BRUTE_FORCE)
+ // brute force algorithm
+ if (m_degree != poly.m_degree)
+ throw domain_error("can not multiply two polynomials with different lengths");
+
+ polynomial<T> result(2 * m_degree - 1, T(0));
+
+ for (size_t n1 = 0; n1 < m_degree; ++n1)
+ for (size_t n2 = 0; n2 < m_degree; ++n2)
+ result.m_coeff[n1 + n2] += m_coeff[n1] * poly.m_coeff[n2];
+ #else
+ // duplicate p1 and p2 to preserve original objects
+ polynomial<T> a1(*this);
+ polynomial<T> a2(poly);
+
+ // expand polynomials to next-largest power of two
+ if (a1.degree() > a2.degree())
+ a2.stretch(a1.stretch_fft());
+ else
+ a1.stretch(a2.stretch_fft());
+
+ // FFT polynomials
+ polynomial< complex<T> > dft1 = fft(a1);
+ polynomial< complex<T> > dft2 = fft(a2);
+
+ // multiply coefficients
+ size_t n2 = a1.degree();
+
+ for (size_t k = 0; k < n2; ++k)
+ dft1[k] *= dft2[k];
+
+ // inverse DFT to obtain result
+ dft2 = inverse_fft(dft1);
+
+ // convert back to complex<T>
+ --n2;
+ polynomial<T> result(n2);
+
+ for (size_t k = 0; k < n2; ++k)
+ result[k] = dft2[k].real();
+ #endif
+
+ return result;
+ }
+
+ // shorthand mathematical operators
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator += (const polynomial<T> & poly)
+ {
+ size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;
+
+ for (size_t n = 0; n < length; ++n)
+ m_coeff[n] += poly.m_coeff[n];
+
+ return *this;
+ }
+
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator -= (const polynomial<T> & poly)
+ {
+ size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;
+
+ for (size_t n = 0; n < length; ++n)
+ m_coeff[n] -= poly.m_coeff[n];
+
+ return *this;
+ }
+
+ template <typename T>
+ polynomial<T> & polynomial<T>::operator *= (const polynomial<T> & poly)
+ {
+ *this = (*this) * poly;
+ }
+
+ //---------------------------------------------------------------------------
+ // Entry point
+ int main(int argc, char ** argv)
+ {
+ // are we testing using a genetic algorithm?
+ bool ga_testing = false;
+
+ if (argc > 1)
+ {
+ for (int i = 1; i < argc; ++i)
+ {
+ if (!strcmp(argv[1],"-ga"))
+ {
+ ga_testing = true;
+ break;
+ }
+ }
+ }
+
+ // generate random polynomials
+ polynomial<double> poly1(TEST_SIZE);
+ polynomial<double> poly2(TEST_SIZE);
+ polynomial<double> poly3(TEST_SIZE * 2 - 1);
+
+ for (int n = 0; n < TEST_SIZE; ++n)
+ {
+ poly1[n] = random_double();
+ poly2[n] = random_double();
+ }
+
+ // get starting time
+ //struct timespec start, stop;
+ //clock_gettime(CLOCK_REALTIME,&start);
+
+ // what we're timing
+ poly3 = poly1 * poly2;
+
+ // get final time
+ //clock_gettime(CLOCK_REALTIME,&stop);
+ double run_time = 0;
+ //(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
+
+ // report runtime
+ if (ga_testing)
+ cout << run_time;
+ else
+ cout << "\nfftbench (Std. C++) run time: " << run_time << "\n\n";
+
+ cout.flush();
+
+ return 0;
+ }
Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/huffbench.c
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/huffbench.c:1.1
*** /dev/null Sat Mar 4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/huffbench.c Sat Mar 4 16:35:18 2006
***************
*** 0 ****
--- 1,488 ----
+ /*
+ huffbench
+ Standard C version
+ 17 April 2003
+
+ Written by Scott Robert Ladd (scott at coyotegulch.com)
+ No rights reserved. This is public domain software, for use by anyone.
+
+ A data compression benchmark that can also be used as a fitness test
+ for evolving optimal compiler options via genetic algorithm.
+
+ This program implements the Huffman compression algorithm. The code
+ is not the tightest or fastest possible C code; rather, it is a
+ relatively straight-forward implementation of the algorithm,
+ providing opportunities for an optimizer to "do its thing."
+
+ Note that the code herein is design for the purpose of testing
+ computational performance; error handling and other such "niceties"
+ is virtually non-existent.
+
+ Actual benchmark results can be found at:
+ http://www.coyotegulch.com
+
+ Please do not use this information or algorithm in any way that might
+ upset the balance of the universe or otherwise cause the creation of
+ singularities.
+ */
+
+ #include <time.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <stddef.h>
+ #include <stdbool.h>
+ #include <memory.h>
+ #include <math.h>
+
+ // embedded random number generator; ala Park and Miller
+ static long seed = 1325;
+ static const long IA = 16807;
+ static const long IM = 2147483647;
+ static const long IQ = 127773;
+ static const long IR = 2836;
+ static const long MASK = 123459876;
+
+ // return index between 0 and 3
+ static size_t random4()
+ {
+ long k;
+ size_t result;
+
+ seed ^= MASK;
+ k = seed / IQ;
+ seed = IA * (seed - k * IQ) - IR * k;
+
+ if (seed < 0L)
+ seed += IM;
+
+ result = (size_t)(seed % 32L);
+ seed ^= MASK;
+
+ return result;
+ }
+
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int NUM_LOOPS = 5;
+ static const int TEST_SIZE = 10000000;
+ #else
+ static const int NUM_LOOPS = 2;
+ static const int TEST_SIZE = 5000000;
+ #endif
+ #else
+ static const int NUM_LOOPS = 30;
+ static const int TEST_SIZE = 10000000;
+ #endif
+
+ typedef unsigned long bits32;
+ typedef unsigned char byte;
+
+ // compressed (encoded) data
+
+ byte * generate_test_data(size_t n)
+ {
+ char * codes = "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345";
+
+ byte * result = (byte *)malloc(n);
+ byte * ptr = result;
+
+ int i;
+ for (i = 0; i < n; ++i)
+ {
+ *ptr = (byte)codes[random4()];
+ ++ptr;
+ }
+
+ return result;
+ }
+
+ // utility function for processing compression trie
+ static void heap_adjust(size_t * freq, size_t * heap, int n, int k)
+ {
+ // this function compares the values in the array
+ // 'freq' to order the elements of 'heap' according
+ // in an inverse heap. See the chapter on priority
+ // queues and heaps for more explanation.
+ int j;
+
+ --heap;
+
+ int v = heap[k];
+
+ while (k <= (n / 2))
+ {
+ j = k + k;
+
+ if ((j < n) && (freq[heap[j]] > freq[heap[j+1]]))
+ ++j;
+
+ if (freq[v] < freq[heap[j]])
+ break;
+
+ heap[k] = heap[j];
+ k = j;
+ }
+
+ heap[k] = v;
+ }
+
+ // Huffman compression/decompression function
+ void compdecomp(byte * data, size_t data_len)
+ {
+ size_t i, j, n, mask;
+ bits32 k, t;
+ byte c;
+ byte * cptr;
+ byte * dptr = data;
+
+ /*
+ COMPRESSION
+ */
+
+ // allocate data space
+ byte * comp = (byte *)malloc(data_len + 1);
+
+ size_t freq[512]; // allocate frequency table
+ size_t heap[256]; // allocate heap
+ int link[512]; // allocate link array
+ bits32 code[256]; // huffman codes
+ byte clen[256]; // bit lengths of codes
+
+ memset(comp,0,sizeof(byte) * (data_len + 1));
+ memset(freq,0,sizeof(size_t) * 512);
+ memset(heap,0,sizeof(size_t) * 256);
+ memset(link,0,sizeof(int) * 512);
+ memset(code,0,sizeof(bits32) * 256);
+ memset(clen,0,sizeof(byte) * 256);
+
+ // count frequencies
+ for (i = 0; i < data_len; ++i)
+ {
+ ++freq[(size_t)(*dptr)];
+ ++dptr;
+ }
+
+ // create indirect heap based on frequencies
+ n = 0;
+
+ for (i = 0; i < 256; ++i)
+ {
+ if (freq[i])
+ {
+ heap[n] = i;
+ ++n;
+ }
+ }
+
+ for (i = n; i > 0; --i)
+ heap_adjust(freq,heap,n,i);
+
+ // generate a trie from heap
+ size_t temp;
+
+ // at this point, n contains the number of characters
+ // that occur in the data array
+ while (n > 1)
+ {
+ // take first item from top of heap
+ --n;
+ temp = heap[0];
+ heap[0] = heap[n];
+
+ // adjust the heap to maintain properties
+ heap_adjust(freq,heap,n,1);
+
+ // in upper half of freq array, store sums of
+ // the two smallest frequencies from the heap
+ freq[256 + n] = freq[heap[0]] + freq[temp];
+ link[temp] = 256 + n; // parent
+ link[heap[0]] = -256 - n; // left child
+ heap[0] = 256 + n; // right child
+
+ // adjust the heap again
+ heap_adjust(freq,heap,n,1);
+ }
+
+ link[256 + n] = 0;
+
+ // generate codes
+ size_t m, x, maxx = 0, maxi = 0;
+ int l;
+
+ for (m = 0; m < 256; ++m)
+ {
+ if (!freq[m]) // character does not occur
+ {
+ code[m] = 0;
+ clen[m] = 0;
+ }
+ else
+ {
+ i = 0; // length of current code
+ j = 1; // bit being set in code
+ x = 0; // code being built
+ l = link[m]; // link in trie
+
+ while (l) // while not at end of trie
+ {
+ if (l < 0) // left link (negative)
+ {
+ x += j; // insert 1 into code
+ l = -l; // reverse sign
+ }
+
+ l = link[l]; // move to next link
+ j <<= 1; // next bit to be set
+ ++i; // increment code length
+ }
+
+ code[m] = (unsigned long)x; // save code
+ clen[m] = (unsigned char)i; // save code len
+
+ // keep track of biggest key
+ if (x > maxx)
+ maxx = x;
+
+ // keep track of longest key
+ if (i > maxi)
+ maxi = i;
+ }
+ }
+
+ // make sure longest codes fit in unsigned long-bits
+ if (maxi > (sizeof(unsigned long) * 8))
+ {
+ fprintf(stderr,"error: bit code overflow\n");
+ exit(1);
+ }
+
+ // encode data
+ size_t comp_len = 0; // number of data_len output
+ char bout = 0; // byte of encoded data
+ int bit = -1; // count of bits stored in bout
+ dptr = data;
+
+ // watch for one-value file!
+ if (maxx == 0)
+ {
+ fprintf(stderr,"error: file has only one value!\n");
+ exit(1);
+ }
+
+ for (j = 0; j < data_len; ++j)
+ {
+ // start copying at first bit of code
+ mask = 1 << (clen[(*dptr)] - 1);
+
+ // copy code bits
+ for (i = 0; i < clen[(*dptr)]; ++i)
+ {
+ if (bit == 7)
+ {
+ // store full output byte
+ comp[comp_len] = bout;
+ ++comp_len;
+
+ // check for output longer than input!
+ if (comp_len == data_len)
+ {
+ fprintf(stderr,"error: no compression\n");
+ exit(1);
+ }
+
+ bit = 0;
+ bout = 0;
+ }
+ else
+ {
+ // move to next bit
+ ++bit;
+ bout <<= 1;
+ }
+
+ if (code[(*dptr)] & mask)
+ bout |= 1;
+
+ mask >>= 1;
+ }
+
+ ++dptr;
+ }
+
+ // output any incomplete data_len and bits
+ bout <<= (7 - bit);
+ comp[comp_len] = bout;
+ ++comp_len;
+
+ // printf("data len = %u\n",data_len);
+ // printf("comp len = %u\n",comp_len);
+
+ /*
+ DECOMPRESSION
+ */
+
+ // allocate heap2
+ bits32 heap2[256];
+
+ // allocate output character buffer
+ char outc[256];
+
+ // initialize work areas
+ memset(heap2,0,256 * sizeof(bits32));
+
+ // create decode table as trie heap2
+ char * optr = outc;
+
+ for (j = 0; j < 256; ++j)
+ {
+ (*optr) = (char)j;
+ ++optr;
+
+ // if code exists for this byte
+ if (code[j] | clen[j])
+ {
+ // begin at first code bit
+ k = 0;
+ mask = 1 << (clen[j] - 1);
+
+ // find proper node, using bits in
+ // code as path.
+ for (i = 0; i < clen[j]; ++i)
+ {
+ k = k * 2 + 1; // right link
+
+ if (code[j] & mask)
+ ++k; // go left
+
+ mask >>= 1; // next bit
+ }
+
+ heap2[j] = k; // store link in heap2
+ }
+ }
+
+ // sort outc based on heap2
+ for (i = 1; i < 256; ++i)
+ {
+ t = heap2[i];
+ c = outc[i];
+ j = i;
+
+ while ((j) && (heap2[j-1] > t))
+ {
+ heap2[j] = heap2[j - 1];
+ outc[j] = outc[j - 1];
+ --j;
+ }
+
+ heap2[j] = t;
+ outc[j] = c;
+ }
+
+ // find first character in table
+ for (j = 0; heap2[j] == 0; ++j) ;
+
+ // decode data
+ k = 0; // link in trie
+ i = j;
+ mask = 0x80;
+ n = 0;
+ cptr = comp;
+ dptr = data;
+
+ while (n < data_len)
+ {
+ k = k * 2 + 1; // right link
+
+ if ((*cptr) & mask)
+ ++k; // left link if bit on
+
+ // search heap2 until link >= k
+ while (heap2[i] < k)
+ ++i;
+
+ // code matches, character found
+ if (k == heap2[i])
+ {
+ (*dptr) = outc[i];
+ ++dptr;
+ ++n;
+ k = 0;
+ i = j;
+ }
+
+ // move to next bit
+ if (mask > 1)
+ mask >>= 1;
+ else // code extends into next byte
+ {
+ mask = 0x80;
+ ++cptr;
+ }
+ }
+
+ // remove work areas
+ free(comp);
+ }
+
+ int main(int argc, char ** argv)
+ {
+ int i;
+
+ // do we have verbose output?
+ bool ga_testing = false;
+
+ if (argc > 1)
+ {
+ for (i = 1; i < argc; ++i)
+ {
+ if (!strcmp(argv[1],"-ga"))
+ {
+ ga_testing = true;
+ break;
+ }
+ }
+ }
+
+ // initialization
+ byte * test_data = generate_test_data(TEST_SIZE);
+
+ /*
+ FILE * before = fopen("before","wb");
+ fwrite(test_data,1,TEST_SIZE,before);
+ fclose(before);
+ */
+
+ // get starting time
+ //struct timespec start, stop;
+ //clock_gettime(CLOCK_REALTIME,&start);
+
+ // what we're timing
+ for (i = 0; i < NUM_LOOPS; ++i)
+ compdecomp(test_data,TEST_SIZE);
+
+ // calculate run time
+ //clock_gettime(CLOCK_REALTIME,&stop);
+ double run_time = 0; //(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
+
+ /*
+ FILE * after = fopen("after","wb");
+ fwrite(test_data,1,TEST_SIZE,after);
+ fclose(after);
+ */
+
+ // release resources
+ free(test_data);
+
+ // report runtime
+ if (ga_testing)
+ fprintf(stdout,"%f",run_time);
+ else
+ fprintf(stdout,"\nhuffbench (Std. C) run time: %f\n\n",run_time);
+
+ fflush(stdout);
+
+ // done
+ return 0;
+ }
Index: llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c
diff -c /dev/null llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c:1.1
*** /dev/null Sat Mar 4 16:35:30 2006
--- llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c Sat Mar 4 16:35:18 2006
***************
*** 0 ****
--- 1,408 ----
+ /*
+ lpbench
+ Standard C version
+ 17 April 2003
+
+ Written by Scott Robert Ladd (scott at coyotegulch.com)
+ No rights reserved. This is public domain software, for use by anyone.
+
+ A number-crunching benchmark that can be used as a fitness test for
+ evolving optimal compiler options via genetic algorithm.
+
+ This program is a modernized version of the classic Linpack
+ benchmark. My implementation is based on Bonnie Toy's translation
+ of the original Fortran source code, including modifications by Jack
+ Dongarra and Roy Longbottom.
+
+ I've updated the code as follows:
+
+ 1) ANSI C code
+ 2) A timing mechanism based on the POSIX clock_gettime() function
+ 3) Replaces C's rather weak random number generator
+ 4) Compute results for a 2500x2500 (6.25 million element) matrix
+
+ Note that the code herein is design for the purpose of testing
+ computational performance; error handling and other such "niceties"
+ is virtually non-existent.
+
+ Actual benchmark results can be found at:
+ http://www.coyotegulch.com
+
+ Please do not use this information or algorithm in any way that might
+ upset the balance of the universe or otherwise cause a disturbance in
+ the space-time continuum.
+ */
+
+ #include <time.h>
+ #include <string.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <math.h>
+ #include <stdbool.h>
+
+ // embedded random number generator; ala Park and Miller
+ static long seed = 1325;
+ static const long IA = 16807;
+ static const long IM = 2147483647;
+ static const double AM = 4.65661287525E-10;
+ static const long IQ = 127773;
+ static const long IR = 2836;
+ static const long MASK = 123459876;
+
+ static double random_double()
+ {
+ long k;
+ double result;
+
+ seed ^= MASK;
+ k = seed / IQ;
+ seed = IA * (seed - k * IQ) - IR * k;
+
+ if (seed < 0)
+ seed += IM;
+
+ result = AM * seed;
+ seed ^= MASK;
+
+ return result;
+ }
+
+ #if defined(ACOVEA)
+ #if defined(arch_pentium4)
+ static const int N = 1000;
+ static const int NM1 = 999; // N - 1
+ static const int NP1 = 1001; // N + 1
+ #else
+ static const int N = 600;
+ static const int NM1 = 599; // N - 1
+ static const int NP1 = 601; // N + 1
+ #endif
+ #else
+ static const int N = 2000;
+ static const int NM1 = 1999; // N - 1
+ static const int NP1 = 2001; // N + 1
+ #endif
+
+ // benchmark code
+ void matgen (double ** a, double * b)
+ {
+ // fill arrays
+ int i, j;
+
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ a[j][i] = random_double();
+
+ for (i = 0; i < N; i++)
+ b[i] = 0.0;
+
+ for (j = 0; j < N; j++)
+ for (i = 0; i < N; i++)
+ b[i] += a[j][i];
+ }
+
+ // finds the index of element having max. absolute value.
+ int idamax(int n, double * dx, int dx_off, int incx)
+ {
+ double dmax, dtemp;
+ int i, ix, itemp = 0;
+
+ if (n < 1)
+ itemp = -1;
+ else
+ {
+ if (n ==1)
+ itemp = 0;
+ else
+ {
+ if (incx != 1)
+ {
+ // code for increment not equal to 1
+ dmax = fabs(dx[dx_off]);
+ ix = 1 + incx;
+
+ for (i = 1; i < n; i++)
+ {
+ dtemp = fabs(dx[ix + dx_off]);
+
+ if (dtemp > dmax)
+ {
+ itemp = i;
+ dmax = dtemp;
+ }
+
+ ix += incx;
+ }
+ }
+ else
+ {
+ // code for increment equal to 1
+ itemp = 0;
+ dmax = fabs(dx[dx_off]);
+
+ for (i = 1; i < n; i++)
+ {
+ dtemp = fabs(dx[i + dx_off]);
+
+ if (dtemp > dmax)
+ {
+ itemp = i;
+ dmax = dtemp;
+ }
+ }
+ }
+ }
+ }
+
+ return itemp;
+ }
+
+ // forms the dot product of two vectors.
+ static double ddot(int n, double * dx, int dx_off, int incx, double * dy, int dy_off, int incy)
+ {
+ int i;
+ double dtemp = 0.0;
+
+ if (n > 0)
+ {
+ if (incx != 1 || incy != 1)
+ {
+ // code for unequal increments or equal increments not equal to 1
+ int ix = 0;
+ int iy = 0;
+
+ if (incx < 0)
+ ix = (-n + 1) * incx;
+
+ if (incy < 0)
+ iy = (-n + 1) * incy;
+
+ for (i = 0;i < n; i++)
+ {
+ dtemp += dx[ix + dx_off] * dy[iy + dy_off];
+ ix += incx;
+ iy += incy;
+ }
+ }
+ else
+ {
+ // code for both increments equal to 1
+ for (i=0; i < n; i++)
+ dtemp += dx[i + dx_off] * dy[i + dy_off];
+ }
+ }
+
+ return(dtemp);
+ }
+
+ // scales a vector by a constant.
+ void dscal(int n, double da, double * dx, int dx_off, int incx)
+ {
+ int i;
+
+ if (n > 0)
+ {
+ if (incx != 1)
+ {
+ // code for increment not equal to 1
+ int nincx = n * incx;
+
+ for (i = 0; i < nincx; i += incx)
+ dx[i + dx_off] *= da;
+ }
+ else
+ {
+ // code for increment equal to 1
+ for (i = 0; i < n; i++)
+ dx[i + dx_off] *= da;
+ }
+ }
+ }
+
+ // constant times a vector plus a vector.
+ void daxpy(int n, double da, double * dx, int dx_off, int incx, double * dy, int dy_off, int incy)
+ {
+ int i;
+
+ if ((n > 0) && (da != 0))
+ {
+ if (incx != 1 || incy != 1)
+ {
+ // code for unequal increments or equal increments not equal to 1
+ int ix = 0;
+ int iy = 0;
+
+ if (incx < 0)
+ ix = (1 - n) * incx;
+
+ if (incy < 0)
+ iy = (1 - n) * incy;
+
+ for (i = 0; i < n; i++)
+ {
+ dy[iy + dy_off] += da * dx[ix + dx_off];
+ ix += incx;
+ iy += incy;
+ }
+
+ return;
+ }
+ else
+ {
+ // code for both increments equal to 1
+ for (i = 0; i < n; i++)
+ dy[i + dy_off] += da * dx[i + dx_off];
+ }
+ }
+ }
+
+ // Factors a double precision matrix by gaussian elimination.
+ void dgefa(double ** a, int * ipvt)
+ {
+ double temp;
+ int k, j;
+
+ for (k = 0; k < NM1; k++)
+ {
+ double * col_k = a[k];
+ int kp1 = k + 1;
+
+ // find l = pivot index
+ int l = idamax(N - k, col_k, k, 1) + k;
+ ipvt[k] = l;
+
+ // zero pivot implies this column already triangularized
+ if (col_k[l] != 0)
+ {
+ // interchange if necessary
+ if (l != k)
+ {
+ temp = col_k[l];
+ col_k[l] = col_k[k];
+ col_k[k] = temp;
+ }
+
+ // compute multipliers
+ temp = -1.0 / col_k[k];
+ dscal(N - kp1, temp, col_k, kp1, 1);
+
+ // row elimination with column indexing
+ for (j = kp1; j < N; j++)
+ {
+ double * col_j = a[j];
+ temp = col_j[l];
+
+ if (l != k)
+ {
+ col_j[l] = col_j[k];
+ col_j[k] = temp;
+ }
+
+ daxpy(N - kp1, temp, col_k, kp1, 1, col_j, kp1, 1);
+ }
+ }
+ }
+
+ ipvt[N - 1] = N - 1;
+ }
+
+ // Solves the double precision system a * x = b or trans(a) * x = b
+ // using the factors computed by dgeco or dgefa.
+ void dgesl(double ** a, int * ipvt, double * b)
+ {
+ double t;
+ int k, kb;
+
+ // solve a * x = b. first solve l*y = b
+ for (k = 0; k < NM1; k++)
+ {
+ int l = ipvt[k];
+ t = b[l];
+
+ if (l != k)
+ {
+ b[l] = b[k];
+ b[k] = t;
+ }
+
+ int kp1 = k + 1;
+ daxpy(N - kp1, t, a[k], kp1, 1, b, kp1, 1);
+ }
+
+ // now solve u*x = y
+ for (kb = 0; kb < N; kb++)
+ {
+ k = N - (kb + 1);
+ b[k] /= a[k][k];
+ t = -b[k];
+ daxpy(k, t, a[k], 0, 1, b, 0, 1);
+ }
+ }
+
+ int main(int argc, char ** argv)
+ {
+ int i;
+
+ // do we have verbose output?
+ bool ga_testing = false;
+
+ if (argc > 1)
+ {
+ for (i = 1; i < argc; ++i)
+ {
+ if (!strcmp(argv[1],"-ga"))
+ {
+ ga_testing = true;
+ break;
+ }
+ }
+ }
+
+ double ** a = (double **)malloc(sizeof(double) * N);
+
+ for (i = 0; i < N; ++i)
+ a[i] = (double *)malloc(sizeof(double) * NP1);
+
+ double * b = (double *)malloc(sizeof(double) * N);
+ double * x = (double *)malloc(sizeof(double) * N);
+ int * ipvt = (int *)malloc(sizeof(int) * N);
+
+ // calculate operations per timeInSeconds
+ double ops = (2.0 * ((double)N * N * N)) / 3.0 + 2.0 * ((double)N * N);
+
+ // generate matrix
+ matgen(a,b);
+
+ // get starting time
+ //struct timespec start, stop;
+ //clock_gettime(CLOCK_REALTIME,&start);
+
+ // what we're timing
+ dgefa(a,ipvt);
+ dgesl(a,ipvt,b);
+
+ // calculate run time
+ //clock_gettime(CLOCK_REALTIME,&stop);
+ double run_time = 0;//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
+
+ // clean up
+ free(ipvt);
+ free(x);
+ free(b);
+
+ for (i = 0; i < N; ++i)
+ free(a[i]);
+
+ free(a);
+
+ // report runtime
+ if (ga_testing)
+ fprintf(stdout,"%f",run_time);
+ else
+ fprintf(stdout,"\nlpbench (Std. C) run time: %f\n\n",run_time);
+
+ fflush(stdout);
+
+ // done
+ return 0;
+ }
More information about the llvm-commits
mailing list