[llvm-commits] [test-suite] r49159 - in /test-suite/trunk/SingleSource/Benchmarks/Misc: fbench.c ffbench.c

Thu Apr 3 01:57:51 PDT 2008

Author: resistor
Date: Thu Apr  3 03:57:51 2008
New Revision: 49159

URL: http://llvm.org/viewvc/llvm-project?rev=49159&view=rev
Log:
Add the fbench and ffbench benchmarks from Fourmi Lab.

Added:
    test-suite/trunk/SingleSource/Benchmarks/Misc/fbench.c
    test-suite/trunk/SingleSource/Benchmarks/Misc/ffbench.c

Added: test-suite/trunk/SingleSource/Benchmarks/Misc/fbench.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/fbench.c?rev=49159&view=auto

==============================================================================

--- test-suite/trunk/SingleSource/Benchmarks/Misc/fbench.c (added)
+++ test-suite/trunk/SingleSource/Benchmarks/Misc/fbench.c Thu Apr  3 03:57:51 2008
@@ -0,0 +1,827 @@
+/*
+
+        John Walker's Floating Point Benchmark, derived from...
+
+	Marinchip Interactive Lens Design System
+
+				     John Walker   December 1980
+
+	By John Walker
+	   http://www.fourmilab.ch/
+
+	This  program may be used, distributed, and modified freely as
+	long as the origin information is preserved.
+
+	This  is  a  complete  optical	design	raytracing  algorithm,
+	stripped of its user interface and recast into portable C.  It
+	not only determines execution speed on an  extremely  floating
+	point	(including   trig   function)	intensive   real-world
+	application, it  checks  accuracy  on  an  algorithm  that  is
+	exquisitely  sensitive	to  errors.   The  performance of this
+	program is typically far more  sensitive  to  changes  in  the
+	efficiency  of	the  trigonometric  library  routines than the
+	average floating point program.
+
+	The benchmark may be compiled in two  modes.   If  the	symbol
+	INTRIG	is  defined,  built-in	trigonometric  and square root
+	routines will be used for all calculations.  Timings made with
+        INTRIG  defined  reflect  the  machine's  basic floating point
+	performance for the arithmetic operators.  If  INTRIG  is  not
+	defined,  the  system  library	<math.h>  functions  are used.
+        Results with INTRIG not defined reflect the  system's  library
+	performance  and/or  floating  point hardware support for trig
+	functions and square root.  Results with INTRIG defined are  a
+	good  guide  to  general  floating  point  performance,  while
+	results with INTRIG undefined indicate the performance	of  an
+	application which is math function intensive.
+
+	Special  note  regarding  errors in accuracy: this program has
+	generated numbers identical to the last digit it  formats  and
+	checks on the following machines, floating point
+	architectures, and languages:
+
+	Marinchip 9900	  QBASIC    IBM 370 double-precision (REAL * 8) format
+
+	IBM PC / XT / AT  Lattice C IEEE 64 bit, 80 bit temporaries
+			  High C    same, in line 80x87 code
+                          BASICA    "Double precision"
+			  Quick BASIC IEEE double precision, software routines
+
+	Sun 3		  C	    IEEE 64 bit, 80 bit temporaries,
+				    in-line 68881 code, in-line FPA code.
+
+        MicroVAX II       C         Vax "G" format floating point
+
+	Macintosh Plus	  MPW C     SANE floating point, IEEE 64 bit format
+				    implemented in ROM.
+
+	Inaccuracies  reported	by  this  program should be taken VERY
+	SERIOUSLY INDEED, as the program has been demonstrated	to  be
+	invariant  under  changes in floating point format, as long as
+	the format is a recognised double precision  format.   If  you
+	encounter errors, please remember that they are just as likely
+	to  be	in  the  floating  point  editing   library   or   the
+	trigonometric  libraries  as  in  the low level operator code.
+
+	The benchmark assumes that results are basically reliable, and
+	only tests the last result computed against the reference.  If
+        you're running on  a  suspect  system  you  can  compile  this
+	program  with  ACCURACY defined.  This will generate a version
+	which executes as an infinite loop, performing the  ray  trace
+	and checking the results on every pass.  All incorrect results
+	will be reported.
+
+	Representative	timings  are  given  below.   All  have   been
+	normalised as if run for 1000 iterations.
+
+  Time in seconds		   Computer, Compiler, and notes
+ Normal      INTRIG
+
+ 3466.00    4031.00	Commodore 128, 2 Mhz 8510 with software floating
+			point.	Abacus Software/Data-Becker Super-C 128,
+			version 3.00, run in fast (2 Mhz) mode.  Note:
+			the results generated by this system differed
+			from the reference results in the 8th to 10th
+			decimal place.
+
+ 3290.00		IBM PC/AT 6 Mhz, Microsoft/IBM BASICA version A3.00.
+                        Run with the "/d" switch, software floating point.
+
+ 2131.50		IBM PC/AT 6 Mhz, Lattice C version 2.14, small model.
+			This version of Lattice compiles subroutine
+			calls which either do software floating point
+			or use the 80x87.  The machine on which I ran
+			this had an 80287, but the results were so bad
+			I wonder if it was being used.
+
+ 1598.00		Macintosh Plus, MPW C, SANE Software floating point.
+
+ 1582.13		Marinchip 9900 2 Mhz, QBASIC compiler with software
+			floating point.  This was a QBASIC version of the
+			program which contained the identical algorithm.
+
+  404.00		IBM PC/AT 6 Mhz, Microsoft QuickBASIC version 2.0.
+			Software floating point.
+
+  165.15		IBM PC/AT 6 Mhz, Metaware High C version 1.3, small
+			model.	This was compiled to call subroutines for
+			floating point, and the machine contained an 80287
+			which was used by the subroutines.
+
+  143.20		Macintosh II, MPW C, SANE calls.  I was unable to
+			determine whether SANE was using the 68881 chip or
+			not.
+
+  121.80		Sun 3/160 16 Mhz, Sun C.  Compiled with -fsoft switch
+			which executes floating point in software.
+
+   78.78     110.11	IBM RT PC (Model 6150).  IBM AIX 1.0 C compiler
+			with -O switch.
+
+   75.2      254.0	Microsoft Quick C 1.0, in-line 8087 instructions,
+			compiled with 80286 optimisation on.  (Switches
+			were -Ol -FPi87-G2 -AS).  Small memory model.
+
+   69.50		IBM PC/AT 6Mhz, Borland Turbo BASIC 1.0.  Compiled
+                        in "8087 required" mode to generate in-line
+			code for the math coprocessor.
+
+   66.96		IBM PC/AT 6Mhz, Microsoft QuickBASIC 4.0.  This
+			release of QuickBASIC compiles code for the
+			80287 math coprocessor.
+
+   66.36     206.35	IBM PC/AT 6Mhz, Metaware High C version 1.3, small
+			model.	This was compiled with in-line code for the
+			80287 math coprocessor.  Trig functions still call
+			library routines.
+
+   63.07     220.43	IBM PC/AT, 6Mhz, Borland Turbo C, in-line 8087 code,
+			small model, word alignment, no stack checking,
+			8086 code mode.
+
+   17.18		Apollo DN-3000, 12 Mhz 68020 with 68881, compiled
+			with in-line code for the 68881 coprocessor.
+			According to Apollo, the library routines are chosen
+			at runtime based on coprocessor presence.  Since the
+			coprocessor was present, the library is supposed to
+			use in-line floating point code.
+
+   15.55      27.56	VAXstation II GPX.  Compiled and executed under
+			VAX/VMS C.
+
+   15.14      37.93	Macintosh II, Unix system V.  Green Hills 68020
+			Unix compiler with in-line code for the 68881
+			coprocessor (-O -ZI switches).
+
+   12.69		Sun 3/160 16 Mhz, Sun C.  Compiled with -fswitch,
+			which calls a subroutine to select the fastest
+			floating point processor.  This was using the 68881.
+
+   11.74      26.73	Compaq Deskpro 386, 16 Mhz 80386 with 16 Mhz 80387.
+			Metaware High C version 1.3, compiled with in-line
+			for the math coprocessor (but not optimised for the
+			80386/80387).  Trig functions still call library
+			routines.
+
+    8.43      30.49	Sun 3/160 16 Mhz, Sun C.  Compiled with -f68881,
+			generating in-line MC68881 instructions.  Trig
+			functions still call library routines.
+
+    6.29      25.17	Sun 3/260 25 Mhz, Sun C.  Compiled with -f68881,
+			generating in-line MC68881 instructions.  Trig
+			functions still call library routines.
+
+    4.57		Sun 3/260 25 Mhz, Sun FORTRAN 77.  Compiled with
+			-O -f68881, generating in-line MC68881 instructions.
+			Trig functions are compiled in-line.  This used
+			the FORTRAN 77 version of the program, FBFORT77.F.
+
+    4.00      14.20	Sun386i/25 Mhz model 250, Sun C compiler.
+
+    4.00      14.00	Sun386i/25 Mhz model 250, Metaware C.
+
+    3.10      12.00	Compaq 386/387 25 Mhz running SCO Xenix 2.
+			Compiled with Metaware HighC 386, optimized
+			for 386.
+
+    3.00      12.00	Compaq 386/387 25MHZ optimized for 386/387.
+
+    2.96       5.17	Sun 4/260, Sparc RISC processor.  Sun C,
+			compiled with the -O2 switch for global
+			optimisation.
+
+    2.47		COMPAQ 486/25, secondary cache disabled, High C,
+			486/387, inline f.p., small memory model.
+
+    2.20       3.40	Data General Motorola 88000, 16 Mhz, Gnu C.
+
+    1.56		COMPAQ 486/25, 128K secondary cache, High C, 486/387,
+			inline f.p., small memory model.
+
+    0.66       1.50	DEC Pmax, Mips processor.
+
+    0.63       0.91	Sun SparcStation 2, Sun C (SunOS 4.1.1) with
+                        -O4 optimisation and "/usr/lib/libm.il" inline
+			floating point.
+
+    0.60       1.07	Intel 860 RISC processor, 33 Mhz, Greenhills
+			C compiler.
+
+    0.40       0.90	Dec 3MAX, MIPS 3000 processor, -O4.
+
+    0.31       0.90	IBM RS/6000, -O.
+
+    0.1129     0.2119	Dell Dimension XPS P133c, Pentium 133 MHz,
+			Windows 95, Microsoft Visual C 5.0.
+
+    0.0883     0.2166	Silicon Graphics Indigo², MIPS R4400,
+                        175 Mhz, "-O3".
+
+    0.0351     0.0561	Dell Dimension XPS R100, Pentium II 400 MHz,
+			Windows 98, Microsoft Visual C 5.0.
+
+    0.0312     0.0542	Sun Ultra 2, UltraSPARC V9, 300 MHz, Solaris
+			2.5.1.
+			
+    0.00862    0.01074  Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef INTRIG
+#include <math.h>
+#endif
+
+#define cot(x) (1.0 / tan(x))
+
+#define TRUE  1
+#define FALSE 0
+
+#define max_surfaces 10
+
+/*  Local variables  */
+
+static char tbfr[132];
+
+static short current_surfaces;
+static short paraxial;
+
+static double clear_aperture;
+
+static double aberr_lspher;
+static double aberr_osc;
+static double aberr_lchrom;
+
+static double max_lspher;
+static double max_osc;
+static double max_lchrom;
+
+static double radius_of_curvature;
+static double object_distance;
+static double ray_height;
+static double axis_slope_angle;
+static double from_index;
+static double to_index;
+
+static double spectral_line[9];
+static double s[max_surfaces][5];
+static double od_sa[2][2];
+
+static char outarr[8][80];	   /* Computed output of program goes here */
+
+int itercount;			   /* The iteration counter for the main loop
+				      in the program is made global so that
+				      the compiler should not be allowed to
+				      optimise out the loop over the ray
+				      tracing code. */
+
+#ifndef ITERATIONS
+#define ITERATIONS 1000
+#endif
+int niter = ITERATIONS; 	   /* Iteration counter */
+
+static char *refarr[] = {	   /* Reference results.  These happen to
+				      be derived from a run on Microsoft 
+				      Quick BASIC on the IBM PC/AT. */
+
+        "   Marginal ray          47.09479120920   0.04178472683",
+        "   Paraxial ray          47.08372160249   0.04177864821",
+        "Longitudinal spherical aberration:        -0.01106960671",
+        "    (Maximum permissible):                 0.05306749907",
+        "Offense against sine condition (coma):     0.00008954761",
+        "    (Maximum permissible):                 0.00250000000",
+        "Axial chromatic aberration:                0.00448229032",
+        "    (Maximum permissible):                 0.05306749907"
+};
+
+/* The	test  case  used  in  this program is the  design for a 4 inch
+   achromatic telescope  objective  used  as  the  example  in  Wyld's
+   classic  work  on  ray  tracing by hand, given in Amateur Telescope
+   Making, Volume 3.  */
+
+static double testcase[4][4] = {
+	{27.05, 1.5137, 63.6, 0.52},
+	{-16.68, 1, 0, 0.138},
+	{-16.68, 1.6164, 36.7, 0.38},
+	{-78.1, 1, 0, 0}
+};
+
+/*  Internal trig functions (used only if INTRIG is  defined).	 These
+    standard  functions  may be enabled to obtain timings that reflect
+    the machine's floating point performance rather than the speed  of
+    its trig function evaluation.  */
+
+#ifdef INTRIG
+
+/*  The following definitions should keep you from getting intro trouble
+    with compilers which don't let you redefine intrinsic functions.  */
+
+#define sin I_sin
+#define cos I_cos
+#define tan I_tan
+#define sqrt I_sqrt
+#define atan I_atan
+#define atan2 I_atan2
+#define asin I_asin
+
+#define fabs(x)  ((x < 0.0) ? -x : x)
+
+#define pic 3.1415926535897932
+
+/*  Commonly used constants  */
+
+static double pi = pic,
+	twopi =pic * 2.0,
+	piover4 = pic / 4.0,
+	fouroverpi = 4.0 / pic,
+	piover2 = pic / 2.0;
+
+/*  Coefficients for ATAN evaluation  */
+
+static double atanc[] = {
+	0.0,
+	0.4636476090008061165,
+	0.7853981633974483094,
+	0.98279372324732906714,
+	1.1071487177940905022,
+	1.1902899496825317322,
+	1.2490457723982544262,
+	1.2924966677897852673,
+	1.3258176636680324644
+};
+
+/*  aint(x)	  Return integer part of number.  Truncates towards 0	 */
+
+double aint(x)
+double x;
+{
+	long l;
+
+	/*  Note that this routine cannot handle the full floating point
+	    number range.  This function should be in the machine-dependent
+	    floating point library!  */
+
+	l = x;
+	if ((int)(-0.5) != 0  &&  l < 0 )
+	   l++;
+	x = l;
+	return x;
+}
+
+/*  sin(x)	  Return sine, x in radians  */
+
+static double sin(x)
+double x;
+{
+	int sign;
+	double y, r, z;
+
+	x = (((sign= (x < 0.0)) != 0) ? -x: x);
+
+	if (x > twopi)
+	   x -= (aint(x / twopi) * twopi);
+
+	if (x > pi) {
+	   x -= pi;
+	   sign = !sign;
+	}
+
+	if (x > piover2)
+	   x = pi - x;
+
+	if (x < piover4) {
+	   y = x * fouroverpi;
+	   z = y * y;
+	   r = y * (((((((-0.202253129293E-13 * z + 0.69481520350522E-11) * z -
+	      0.17572474176170806E-8) * z + 0.313361688917325348E-6) * z -
+	      0.365762041821464001E-4) * z + 0.249039457019271628E-2) * z -
+	      0.0807455121882807815) * z + 0.785398163397448310);
+	} else {
+	   y = (piover2 - x) * fouroverpi;
+	   z = y * y;
+	   r = ((((((-0.38577620372E-12 * z + 0.11500497024263E-9) * z -
+	      0.2461136382637005E-7) * z + 0.359086044588581953E-5) * z -
+	      0.325991886926687550E-3) * z + 0.0158543442438154109) * z -
+	      0.308425137534042452) * z + 1.0;
+	}
+	return sign ? -r : r;
+}
+
+/*  cos(x)	  Return cosine, x in radians, by identity  */
+
+static double cos(x)
+double x;
+{
+	x = (x < 0.0) ? -x : x;
+	if (x > twopi)		      /* Do range reduction here to limit */
+	   x = x - (aint(x / twopi) * twopi); /* roundoff on add of PI/2    */
+	return sin(x + piover2);
+}
+
+/*  tan(x)	  Return tangent, x in radians, by identity  */
+
+static double tan(x)
+double x;
+{
+	return sin(x) / cos(x);
+}
+
+/*  sqrt(x)	  Return square root.  Initial guess, then Newton-
+		  Raphson refinement  */
+
+double sqrt(x)
+double x;
+{
+	double c, cl, y;
+	int n;
+
+	if (x == 0.0)
+	   return 0.0;
+
+	if (x < 0.0) {
+	   fprintf(stderr,
+              "\nGood work!  You tried to take the square root of %g",
+	     x);
+	   fprintf(stderr,
+              "\nunfortunately, that is too complex for me to handle.\n");
+	   exit(1);
+	}
+
+	y = (0.154116 + 1.893872 * x) / (1.0 + 1.047988 * x);
+
+	c = (y - x / y) / 2.0;
+	cl = 0.0;
+	for (n = 50; c != cl && n--;) {
+	   y = y - c;
+	   cl = c;
+	   c = (y - x / y) / 2.0;
+	}
+	return y;
+}
+
+/*  atan(x)	  Return arctangent in radians,
+		  range -pi/2 to pi/2  */
+
+static double atan(x)
+double x;
+{
+	int sign, l, y;
+	double a, b, z;
+
+	x = (((sign = (x < 0.0)) != 0) ? -x : x);
+	l = 0;
+
+	if (x >= 4.0) {
+	   l = -1;
+	   x = 1.0 / x;
+	   y = 0;
+	   goto atl;
+	} else {
+	   if (x < 0.25) {
+	      y = 0;
+	      goto atl;
+	   }
+	}
+
+	y = aint(x / 0.5);
+	z = y * 0.5;
+	x = (x - z) / (x * z + 1);
+
+atl:
+	z = x * x;
+	b = ((((893025.0 * z + 49116375.0) * z + 425675250.0) * z +
+	    1277025750.0) * z + 1550674125.0) * z + 654729075.0;
+	a = (((13852575.0 * z + 216602100.0) * z + 891080190.0) * z +
+	    1332431100.0) * z + 654729075.0;
+	a = (a / b) * x + atanc[y];
+	if (l)
+	   a=piover2 - a;
+	return sign ? -a : a;
+}
+
+/*  atan2(y,x)	  Return arctangent in radians of y/x,
+		  range -pi to pi  */
+
+static double atan2(y, x)
+double y, x;
+{
+	double temp;
+
+	if (x == 0.0) {
+	   if (y == 0.0)   /*  Special case: atan2(0,0) = 0  */
+	      return 0.0;
+	   else if (y > 0)
+	      return piover2;
+	   else
+	      return -piover2;
+	}
+	temp = atan(y / x);
+	if (x < 0.0) {
+	   if (y >= 0.0)
+	      temp += pic;
+	   else
+	      temp -= pic;
+	}
+	return temp;
+}
+
+/*  asin(x)	  Return arcsine in radians of x  */
+
+static double asin(x)
+double x;
+{
+	if (fabs(x)>1.0) {
+	   fprintf(stderr,
+              "\nInverse trig functions lose much of their gloss when");
+	   fprintf(stderr,
+              "\ntheir arguments are greater than 1, such as the");
+	   fprintf(stderr,
+              "\nvalue %g you passed.\n", x);
+	   exit(1);
+	}
+	return atan2(x, sqrt(1 - x * x));
+}
+#endif
+
+/*	      Calculate passage through surface
+
+	      If  the variable PARAXIAL is true, the trace through the
+	      surface will be done using the paraxial  approximations.
+	      Otherwise,  the normal trigonometric trace will be done.
+
+	      This routine takes the following inputs:
+
+	      RADIUS_OF_CURVATURE	  Radius of curvature of surface
+					  being crossed.  If 0, surface is
+					  plane.
+
+	      OBJECT_DISTANCE		  Distance of object focus from
+					  lens vertex.	If 0, incoming
+					  rays are parallel and
+					  the following must be specified:
+
+	      RAY_HEIGHT		  Height of ray from axis.  Only
+					  relevant if OBJECT.DISTANCE == 0
+
+	      AXIS_SLOPE_ANGLE		  Angle incoming ray makes with axis
+					  at intercept
+
+	      FROM_INDEX		  Refractive index of medium being left
+
+	      TO_INDEX			  Refractive index of medium being
+					  entered.
+
+	      The outputs are the following variables:
+
+	      OBJECT_DISTANCE		  Distance from vertex to object focus
+					  after refraction.
+
+	      AXIS_SLOPE_ANGLE		  Angle incoming ray makes with axis
+					  at intercept after refraction.
+
+*/
+
+static transit_surface() {
+	double iang,		   /* Incidence angle */
+	       rang,		   /* Refraction angle */
+	       iang_sin,	   /* Incidence angle sin */
+	       rang_sin,	   /* Refraction angle sin */
+	       old_axis_slope_angle, sagitta;
+
+	if (paraxial) {
+	   if (radius_of_curvature != 0.0) {
+	      if (object_distance == 0.0) {
+		 axis_slope_angle = 0.0;
+		 iang_sin = ray_height / radius_of_curvature;
+	      } else
+		 iang_sin = ((object_distance -
+		    radius_of_curvature) / radius_of_curvature) *
+		    axis_slope_angle;
+
+	      rang_sin = (from_index / to_index) *
+		 iang_sin;
+	      old_axis_slope_angle = axis_slope_angle;
+	      axis_slope_angle = axis_slope_angle +
+		 iang_sin - rang_sin;
+	      if (object_distance != 0.0)
+		 ray_height = object_distance * old_axis_slope_angle;
+	      object_distance = ray_height / axis_slope_angle;
+	      return;
+	   }
+	   object_distance = object_distance * (to_index / from_index);
+	   axis_slope_angle = axis_slope_angle * (from_index / to_index);
+	   return;
+	}
+
+	if (radius_of_curvature != 0.0) {
+	   if (object_distance == 0.0) {
+	      axis_slope_angle = 0.0;
+	      iang_sin = ray_height / radius_of_curvature;
+	   } else {
+	      iang_sin = ((object_distance -
+		 radius_of_curvature) / radius_of_curvature) *
+		 sin(axis_slope_angle);
+	   }
+	   iang = asin(iang_sin);
+	   rang_sin = (from_index / to_index) *
+	      iang_sin;
+	   old_axis_slope_angle = axis_slope_angle;
+	   axis_slope_angle = axis_slope_angle +
+	      iang - asin(rang_sin);
+	   sagitta = sin((old_axis_slope_angle + iang) / 2.0);
+	   sagitta = 2.0 * radius_of_curvature*sagitta*sagitta;
+	   object_distance = ((radius_of_curvature * sin(
+	      old_axis_slope_angle + iang)) *
+	      cot(axis_slope_angle)) + sagitta;
+	   return;
+	}
+
+	rang = -asin((from_index / to_index) *
+	   sin(axis_slope_angle));
+	object_distance = object_distance * ((to_index *
+	   cos(-rang)) / (from_index *
+	   cos(axis_slope_angle)));
+	axis_slope_angle = -rang;
+}
+
+/*  Perform ray trace in specific spectral line  */
+
+static trace_line(line, ray_h)
+int line;
+double ray_h;
+{
+	int i;
+
+	object_distance = 0.0;
+	ray_height = ray_h;
+	from_index = 1.0;
+
+	for (i = 1; i <= current_surfaces; i++) {
+	   radius_of_curvature = s[i][1];
+	   to_index = s[i][2];
+	   if (to_index > 1.0)
+	      to_index = to_index + ((spectral_line[4] -
+		 spectral_line[line]) /
+		 (spectral_line[3] - spectral_line[6])) * ((s[i][2] - 1.0) /
+		 s[i][3]);
+	   transit_surface();
+	   from_index = to_index;
+	   if (i < current_surfaces)
+	      object_distance = object_distance - s[i][4];
+	}
+}
+
+/*  Initialise when called the first time  */
+
+main(argc, argv)
+int argc;
+char *argv[];
+{
+	int i, j, k, errors;
+	double od_fline, od_cline;
+#ifdef ACCURACY
+	long passes;
+#endif
+
+	spectral_line[1] = 7621.0;	 /* A */
+	spectral_line[2] = 6869.955;	 /* B */
+	spectral_line[3] = 6562.816;	 /* C */
+	spectral_line[4] = 5895.944;	 /* D */
+	spectral_line[5] = 5269.557;	 /* E */
+	spectral_line[6] = 4861.344;	 /* F */
+        spectral_line[7] = 4340.477;     /* G'*/
+	spectral_line[8] = 3968.494;	 /* H */
+
+	/* Process the number of iterations argument, if one is supplied. */
+
+  niter = 1000000;
+
+	/* Load test case into working array */
+
+	clear_aperture = 4.0;
+	current_surfaces = 4;
+	for (i = 0; i < current_surfaces; i++)
+	   for (j = 0; j < 4; j++)
+	      s[i + 1][j + 1] = testcase[i][j];
+
+#ifdef ACCURACY
+        printf("Beginning execution of floating point accuracy test...\n");
+	passes = 0;
+#else
+        printf("Ready to begin John Walker's floating point accuracy\n");
+        printf("and performance benchmark.  %d iterations will be made.\n\n",
+	   niter);
+
+        printf("\nMeasured run time in seconds should be divided by %.f\n", niter / 1000.0);
+        printf("to normalise for reporting results.  For archival results,\n");
+        printf("adjust iteration count so the benchmark runs about five minutes.\n\n");
+#endif
+
+	/* Perform ray trace the specified number of times. */
+
+#ifdef ACCURACY
+	while (TRUE) {
+	   passes++;
+	   if ((passes % 100L) == 0) {
+              printf("Pass %ld.\n", passes);
+	   }
+#else
+	for (itercount = 0; itercount < niter; itercount++) {
+#endif
+
+	   for (paraxial = 0; paraxial <= 1; paraxial++) {
+
+	      /* Do main trace in D light */
+
+	      trace_line(4, clear_aperture / 2.0);
+	      od_sa[paraxial][0] = object_distance;
+	      od_sa[paraxial][1] = axis_slope_angle;
+	   }
+	   paraxial = FALSE;
+
+	   /* Trace marginal ray in C */
+
+	   trace_line(3, clear_aperture / 2.0);
+	   od_cline = object_distance;
+
+	   /* Trace marginal ray in F */
+
+	   trace_line(6, clear_aperture / 2.0);
+	   od_fline = object_distance;
+
+	   aberr_lspher = od_sa[1][0] - od_sa[0][0];
+	   aberr_osc = 1.0 - (od_sa[1][0] * od_sa[1][1]) /
+	      (sin(od_sa[0][1]) * od_sa[0][0]);
+	   aberr_lchrom = od_fline - od_cline;
+	   max_lspher = sin(od_sa[0][1]);
+
+	   /* D light */
+
+	   max_lspher = 0.0000926 / (max_lspher * max_lspher);
+	   max_osc = 0.0025;
+	   max_lchrom = max_lspher;
+#ifndef ACCURACY
+	}
+#endif
+
+	/* Now evaluate the accuracy of the results from the last ray trace */
+
+        sprintf(outarr[0], "%15s   %21.11f  %14.11f",
+           "Marginal ray", od_sa[0][0], od_sa[0][1]);
+        sprintf(outarr[1], "%15s   %21.11f  %14.11f",
+           "Paraxial ray", od_sa[1][0], od_sa[1][1]);
+	sprintf(outarr[2],
+           "Longitudinal spherical aberration:      %16.11f",
+	   aberr_lspher);
+	sprintf(outarr[3],
+           "    (Maximum permissible):              %16.11f",
+	   max_lspher);
+	sprintf(outarr[4],
+           "Offense against sine condition (coma):  %16.11f",
+	   aberr_osc);
+	sprintf(outarr[5],
+           "    (Maximum permissible):              %16.11f",
+	   max_osc);
+	sprintf(outarr[6],
+           "Axial chromatic aberration:             %16.11f",
+	   aberr_lchrom);
+	sprintf(outarr[7],
+           "    (Maximum permissible):              %16.11f",
+	   max_lchrom);
+
+	/* Now compare the edited results with the master values from
+	   reference executions of this program. */
+
+	errors = 0;
+	for (i = 0; i < 8; i++) {
+	   if (strcmp(outarr[i], refarr[i]) != 0) {
+#ifdef ACCURACY
+              printf("\nError in pass %ld for results on line %d...\n",
+		     passes, i + 1);
+#else
+              printf("\nError in results on line %d...\n", i + 1);
+#endif
+              printf("Expected:  \"%s\"\n", refarr[i]);
+              printf("Received:  \"%s\"\n", outarr[i]);
+              printf("(Errors)    ");
+	      k = strlen(refarr[i]);
+	      for (j = 0; j < k; j++) {
+                 printf("%c", refarr[i][j] == outarr[i][j] ? ' ' : '^');
+		 if (refarr[i][j] != outarr[i][j])
+		    errors++;
+	      }
+              printf("\n");
+	   }
+	}
+#ifdef ACCURACY
+	}
+#else
+	if (errors > 0) {
+           printf("\n%d error%s in results.  This is VERY SERIOUS.\n",
+              errors, errors > 1 ? "s" : "");
+	} else
+           printf("\nNo errors in results.\n");
+#endif
+}

Added: test-suite/trunk/SingleSource/Benchmarks/Misc/ffbench.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/ffbench.c?rev=49159&view=auto

==============================================================================
--- test-suite/trunk/SingleSource/Benchmarks/Misc/ffbench.c (added)
+++ test-suite/trunk/SingleSource/Benchmarks/Misc/ffbench.c Thu Apr  3 03:57:51 2008
@@ -0,0 +1,361 @@
+/*
+
+	Two-dimensional FFT benchmark
+
+	Designed and implemented by John Walker in April of 1989.
+
+	This  benchmark executes a specified number of passes (default
+	20) through a loop in which each  iteration  performs  a  fast
+	Fourier transform of a square matrix (default size 256x256) of
+	complex numbers (default precision double),  followed  by  the
+	inverse  transform.   After  all loop iterations are performed
+	the results are checked against known correct values.
+
+	This benchmark is intended for use on C implementations  which
+        define  "int"  as  32 bits or longer and permit allocation and
+	direct addressing of arrays larger than one megabyte.
+
+	If CAPOUT is defined,  the  result  after  all	iterations  is
+	written  as  a	CA  Lab  pattern  file.   This is intended for
+	debugging in case horribly wrong results  are  obtained  on  a
+	given machine.
+
+	Archival  timings  are	run  with the definitions below set as
+	follows: Float = double, Asize = 256, Passes = 20, CAPOUT  not
+	defined.
+
+	Time (seconds)		    System
+
+            2393.93       Sun 3/260, SunOS 3.4, C, "-f68881 -O".
+			  (John Walker).
+
+            1928          Macintosh IIx, MPW C 3.0, "-mc68020
+                          -mc68881 -elems881 -m".  (Hugh Hoover).
+
+            1636.1        Sun 4/110, "cc -O3 -lm".  (Michael McClary).
+			  The suspicion is that this is software
+			  floating point.
+
+            1556.7        Macintosh II, A/UX, "cc -O -lm"
+			  (Michael McClary).
+
+	    1388.8	  Sun 386i/250, SunOS 4.0.1 C
+                          "-O /usr/lib/trig.il".  (James Carrington).
+
+	    1331.93	  Sun 3/60, SunOS 4.0.1, C,
+                          "-O4 -f68881 /usr/lib/libm.il"
+			  (Bob Elman).
+
+            1204.0        Apollo Domain DN4000, C, "-cpu 3000 -opt 4".
+			  (Sam Crupi).
+
+	    1174.66	  Compaq 386/25, SCO Xenix 386 C.
+			  (Peter Shieh).
+
+	    1068	  Compaq 386/25, SCO Xenix 386,
+			  Metaware High C.  (Robert Wenig).
+
+	    1064.0	  Sun 3/80, SunOS 4.0.3 Beta C
+                          "-O3 -f68881 /usr/lib/libm.il".  (James Carrington).
+
+	    1061.4	  Compaq 386/25, SCO Xenix, High C 1.4.
+			  (James Carrington).
+
+	    1059.79	  Compaq 386/25, 387/25, High C 1.4,
+			  DOS|Extender 2.2, 387 inline code
+			  generation.  (Nathan Bender).
+
+	     777.14	  Compaq 386/25, IIT 3C87-25 (387 Compatible),
+			  High C 1.5, DOS|Extender 2.2, 387 inline
+			  code generation.  (Nathan Bender).
+
+	     751	  Compaq DeskPro 386/33, High C 1.5 + DOS|Extender,
+			  387 code generation.	(James Carrington).
+
+	     431.44	  Compaq 386/25, Weitek 3167-25, DOS 3.31,
+			  High C 1.4, DOS|Extender, Weitek code generation.
+			  (Nathan Bender).
+
+	     344.9	  Compaq 486/25, Metaware High C 1.6, Phar Lap
+			  DOS|Extender, in-line floating point.  (Nathan
+			  Bender).
+
+	     324.2	  Data General Motorola 88000, 16 Mhz, Gnu C.
+
+             323.1        Sun 4/280, C, "-O4".  (Eric Hill).
+
+	     254	  Compaq SystemPro 486/33, High C 1.5 + DOS|Extender,
+			  387 code generation.	(James Carrington).
+
+	     242.8	  Silicon Graphics Personal IRIS, MIPS R2000A,
+                          12.5 Mhz, "-O3" (highest level optimisation).
+			  (Mike Zentner).
+
+             233.0        Sun SPARCStation 1, C, "-O4", SunOS 4.0.3.
+			  (Nathan Bender).
+
+	     187.30	  DEC PMAX 3100, MIPS 2000 chip.
+			  (Robert Wenig).
+
+             120.46       Sun SparcStation 2, C, "-O4", SunOS 4.1.1.
+			  (John Walker).
+
+             120.21       DEC 3MAX, MIPS 3000, "-O4".
+
+	      98.0	  Intel i860 experimental environment,
+			  OS/2, data caching disabled.	(Kern
+			  Sibbald).
+
+	      34.9	  Silicon Graphics Indigo², MIPS R4400,
+                          175 Mhz, IRIX 5.2, "-O".
+
+	      32.4	  Pentium 133, Windows NT, Microsoft Visual
+			  C++ 4.0.
+
+	      17.25	  Silicon Graphics Indigo², MIPS R4400,
+                          175 Mhz, IRIX 6.5, "-O3".
+
+	      14.10	  Dell Dimension XPS R100, Pentium II 400 MHz,
+			  Windows 98, Microsoft Visual C 5.0.
+
+	      10.7	  Hewlett-Packard Kayak XU 450Mhz Pentium II,
+			  Microsoft Visual C++ 6.0, Windows NT 4.0sp3.	(Nathan Bender).
+
+	       5.09	  Sun Ultra 2, UltraSPARC V9, 300 MHz, gcc -O3.
+	       
+	       0.846	  Dell Inspiron 9100, Pentium 4, 3.4 GHz, gcc -O3.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+/*  The  program  may  be  run	with  Float defined as either float or
+    double.  With IEEE arithmetic, the same answers are generated  for
+    either floating point mode.  */
+
+#define Float	 double 	   /* Floating point type used in FFT */
+
+#define Asize	 256		   /* Array edge size */
+#define Passes	 63		   /* Number of FFT/Inverse passes */
+
+#define max(a,b) ((a)>(b)?(a):(b))
+#define min(a,b) ((a)<=(b)?(a):(b))
+
+/*
+
+	Multi-dimensional fast Fourier transform
+
+        Adapted from Press et al., "Numerical Recipes in C".
+
+*/
+
+#define SWAP(a,b) tempr=(a); (a)=(b); (b)=tempr
+
+static void fourn(data, nn, ndim, isign)
+  Float data[];
+  int nn[], ndim, isign;
+{
+	register int i1, i2, i3;
+	int i2rev, i3rev, ip1, ip2, ip3, ifp1, ifp2;
+	int ibit, idim, k1, k2, n, nprev, nrem, ntot;
+	Float tempi, tempr;
+	double theta, wi, wpi, wpr, wr, wtemp;
+
+	ntot = 1;
+	for (idim = 1; idim <= ndim; idim++)
+	   ntot *= nn[idim];
+	nprev = 1;
+	for (idim = ndim; idim >= 1; idim--) {
+	   n = nn[idim];
+	   nrem = ntot / (n * nprev);
+	   ip1 = nprev << 1;
+	   ip2 = ip1 * n;
+	   ip3 = ip2 * nrem;
+	   i2rev = 1;
+	   for (i2 = 1; i2 <= ip2; i2 += ip1) {
+	      if (i2 < i2rev) {
+		 for (i1 = i2; i1 <= i2 + ip1 - 2; i1 += 2) {
+		    for (i3 = i1; i3 <= ip3; i3 += ip2) {
+		       i3rev = i2rev + i3 - i2;
+		       SWAP(data[i3], data[i3rev]);
+		       SWAP(data[i3 + 1], data[i3rev + 1]);
+		    }
+		 }
+	      }
+	      ibit = ip2 >> 1;
+	      while (ibit >= ip1 && i2rev > ibit) {
+		 i2rev -= ibit;
+		 ibit >>= 1;
+	      }
+	      i2rev += ibit;
+	   }
+	   ifp1 = ip1;
+	   while (ifp1 < ip2) {
+	      ifp2 = ifp1 << 1;
+	      theta = isign * 6.28318530717959 / (ifp2 / ip1);
+	      wtemp = sin(0.5 * theta);
+	      wpr = -2.0 * wtemp * wtemp;
+	      wpi = sin(theta);
+	      wr = 1.0;
+	      wi = 0.0;
+	      for (i3 = 1; i3 <= ifp1; i3 += ip1) {
+		 for (i1 = i3; i1 <= i3 + ip1 - 2; i1 += 2) {
+		    for (i2 = i1; i2 <= ip3; i2 += ifp2) {
+		       k1 = i2;
+		       k2 = k1 + ifp1;
+		       tempr = wr * data[k2] - wi * data[k2 + 1];
+		       tempi = wr * data[k2 + 1] + wi * data[k2];
+		       data[k2] = data[k1] - tempr;
+		       data[k2 + 1] = data[k1 + 1] - tempi;
+		       data[k1] += tempr;
+		       data[k1 + 1] += tempi;
+		    }
+		 }
+		 wr = (wtemp = wr) * wpr - wi * wpi + wr;
+		 wi = wi * wpr + wtemp * wpi + wi;
+	      }
+	      ifp1 = ifp2;
+	   }
+	   nprev *= n;
+	}
+}
+#undef SWAP
+
+int main()
+{
+	int i, j, k, l, m, npasses = Passes, faedge;
+	Float *fdata;
+	static int nsize[] = {0, 0, 0};
+	long fanum, fasize;
+	double mapbase, mapscale, rmin, rmax, imin, imax;
+
+	faedge = Asize; 	   /* FFT array edge size */
+	fanum = faedge * faedge;   /* Elements in FFT array */
+	fasize = ((fanum + 1) * 2 * sizeof(Float)); /* FFT array size */
+	nsize[1] = nsize[2] = faedge;
+
+	fdata = (Float *) malloc(fasize);
+	if (fdata == NULL) {
+           fprintf(stderr, "Can't allocate data array.\n");
+	   exit(1);
+	}
+
+	/*  Generate data array to process.  */
+
+#define Re(x,y) fdata[1 + (faedge * (x) + (y)) * 2]
+#define Im(x,y) fdata[2 + (faedge * (x) + (y)) * 2]
+
+	memset(fdata, 0, fasize);
+	for (i = 0; i < faedge; i++) {
+	   for (j = 0; j < faedge; j++) {
+	      if (((i & 15) == 8) || ((j & 15) == 8))
+		 Re(i, j) = 128.0;
+	   }
+	}
+
+	for (i = 0; i < npasses; i++) {
+/*printf("Pass %d\n", i);*/
+	   /* Transform image to frequency domain. */
+	   fourn(fdata, nsize, 2, 1);
+
+	   /*  Back-transform to image. */
+	   fourn(fdata, nsize, 2, -1);
+	}
+
+	{
+	   double r, ij, ar, ai;
+	   rmin = 1e10; rmax = -1e10;
+	   imin = 1e10; imax = -1e10;
+	   ar = 0;
+	   ai = 0;
+
+	   for (i = 1; i <= fanum; i += 2) {
+	      r = fdata[i];
+	      ij = fdata[i + 1];
+	      ar += r;
+	      ai += ij;
+	      rmin = min(r, rmin);
+	      rmax = max(r, rmax);
+	      imin = min(ij, imin);
+	      imax = max(ij, imax);
+	   }
+#ifdef DEBUG
+           printf("Real min %.4g, max %.4g.  Imaginary min %.4g, max %.4g.\n",
+	      rmin, rmax, imin, imax);
+           printf("Average real %.4g, imaginary %.4g.\n", 
+	      ar / fanum, ai / fanum);
+#endif
+	   mapbase = rmin;
+	   mapscale = 255 / (rmax - rmin);
+	}
+
+	/* See if we got the right answers. */
+
+	m = 0;
+	for (i = 0; i < faedge; i++) {
+	   for (j = 0; j < faedge; j++) {
+	      k = (Re(i, j) - mapbase) * mapscale;
+	      l = (((i & 15) == 8) || ((j & 15) == 8)) ? 255 : 0;
+	      if (k != l) {
+		 m++;
+		 fprintf(stderr,
+                    "Wrong answer at (%d,%d)!  Expected %d, got %d.\n",
+		    i, j, l, k);
+	      }
+	   }
+	}
+	if (m == 0) {
+           fprintf(stderr, "%d passes.  No errors in results.\n", npasses);
+	} else {
+           fprintf(stderr, "%d passes.  %d errors in results.\n",
+	      npasses, m);
+	}
+
+#ifdef CAPOUT
+
+	/* Output the result of the transform as a CA Lab pattern
+	   file for debugging. */
+
+	{
+#define SCRX  322
+#define SCRY  200
+#define SCRN  (SCRX * SCRY)
+	   unsigned char patarr[SCRY][SCRX];
+	   FILE *fp;
+
+/*  Map user external state numbers to internal state index  */
+
+#define UtoI(x)     (((((x) >> 1) & 0x7F) | ((x) << 7)) & 0xFF)
+
+	   /* Copy data from FFT buffer to map. */
+
+	   memset(patarr, 0, sizeof patarr);
+	   l = (SCRX - faedge) / 2;
+	   m = (faedge > SCRY) ? 0 : ((SCRY - faedge) / 2);
+	   for (i = 1; i < faedge; i++) {
+	      for (j = 0; j < min(SCRY, faedge); j++) {
+		 k = (Re(i, j) - mapbase) * mapscale;
+		 patarr[j + m][i + l] = UtoI(k);
+	      }
+	   }
+
+	   /* Dump pattern map to file. */
+
+           fp = fopen("fft.cap", "w");
+	   if (fp == NULL) {
+              fprintf(stderr, "Cannot open output file.\n");
+	      exit(0);
+	   }
+           putc(':', fp);
+	   putc(1, fp);
+	   fwrite(patarr, SCRN, 1, fp);
+	   putc(6, fp);
+	   fclose(fp);
+	}
+#endif
+
+	return 0;
+}