[llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.c FFT.h LU.c LU.h Makefile MonteCarlo.c MonteCarlo.h README Random.c Random.h SOR.c SOR.h SparseCompRow.c SparseCompRow.h Stopwatch.c Stopwatch.h array.c array.h constants.h kernel.c kernel.h scimark2.c scimark2.h

Brian Gaeke gaeke at persephone.cs.uiuc.edu
Wed Oct 20 11:13:27 PDT 2004



Changes in directory llvm-test/MultiSource/Benchmarks/SciMark2-C:

FFT.c added (r1.1)
FFT.h added (r1.1)
LU.c added (r1.1)
LU.h added (r1.1)
Makefile added (r1.1)
MonteCarlo.c added (r1.1)
MonteCarlo.h added (r1.1)
README added (r1.1)
Random.c added (r1.1)
Random.h added (r1.1)
SOR.c added (r1.1)
SOR.h added (r1.1)
SparseCompRow.c added (r1.1)
SparseCompRow.h added (r1.1)
Stopwatch.c added (r1.1)
Stopwatch.h added (r1.1)
array.c added (r1.1)
array.h added (r1.1)
constants.h added (r1.1)
kernel.c added (r1.1)
kernel.h added (r1.1)
scimark2.c added (r1.1)
scimark2.h added (r1.1)
---
Log message:

Adding benchmark.


---
Diffs of the changes:  (+1296 -0)

Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.c:1.1
*** /dev/null	Wed Oct 20 13:13:21 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.c	Wed Oct 20 13:13:10 2004
***************
*** 0 ****
--- 1,165 ----
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <math.h>
+ 
+ #include "FFT.h"
+ 
+ #define PI  3.1415926535897932
+ 
+ /*-----------------------------------------------------------------------*/
+ 
+ static int int_log2(int n);
+ 
+ double FFT_num_flops(int N)
+ {
+ 
+      double Nd = (double) N;
+      double logN = (double) int_log2(N);
+ 
+      return (5.0*Nd-2)*logN + 2*(Nd+1);
+ }
+ 
+ static int int_log2 (int n)
+ {
+     int k = 1;
+     int log = 0;
+     for(/*k=1*/; k < n; k *= 2, log++);
+     if (n != (1 << log))
+     {
+       printf("FFT: Data length is not a power of 2!: %d ",n);
+       exit(1);
+     }
+     return log; 
+ }
+ 
+ static void FFT_transform_internal (int N, double *data, int direction) {
+     int n = N/2;
+     int bit = 0;
+     int logn;
+     int dual = 1;
+ 
+     if (n == 1) return;         /* Identity operation! */
+     logn = int_log2(n);
+ 
+ 
+     if (N == 0) return;    
+ 
+     /* bit reverse the input data for decimation in time algorithm */
+     FFT_bitreverse(N, data) ;
+ 
+     /* apply fft recursion */
+     /* this loop executed int_log2(N) times */
+     for (bit = 0; bit < logn; bit++, dual *= 2) {
+       double w_real = 1.0;
+       double w_imag = 0.0;
+       int a;
+       int b;
+ 
+       double theta = 2.0 * direction * PI / (2.0 * (double) dual);
+       double s = sin(theta);
+       double t = sin(theta / 2.0);
+       double s2 = 2.0 * t * t;
+ 
+       for (a=0, b = 0; b < n; b += 2 * dual) {
+         int i = 2*b ;
+         int j = 2*(b + dual);
+ 
+         double wd_real = data[j] ;
+         double wd_imag = data[j+1] ;
+           
+         data[j]   = data[i]   - wd_real;
+         data[j+1] = data[i+1] - wd_imag;
+         data[i]  += wd_real;
+         data[i+1]+= wd_imag;
+       }
+       
+       /* a = 1 .. (dual-1) */
+       for (a = 1; a < dual; a++) {
+         /* trignometric recurrence for w-> exp(i theta) w */
+         {
+           double tmp_real = w_real - s * w_imag - s2 * w_real;
+           double tmp_imag = w_imag + s * w_real - s2 * w_imag;
+           w_real = tmp_real;
+           w_imag = tmp_imag;
+         }
+         for (b = 0; b < n; b += 2 * dual) {
+           int i = 2*(b + a);
+           int j = 2*(b + a + dual);
+ 
+           double z1_real = data[j];
+           double z1_imag = data[j+1];
+               
+           double wd_real = w_real * z1_real - w_imag * z1_imag;
+           double wd_imag = w_real * z1_imag + w_imag * z1_real;
+ 
+           data[j]   = data[i]   - wd_real;
+           data[j+1] = data[i+1] - wd_imag;
+           data[i]  += wd_real;
+           data[i+1]+= wd_imag;
+         }
+       }
+     }
+   }
+ 
+ 
+ void FFT_bitreverse(int N, double *data) {
+     /* This is the Goldrader bit-reversal algorithm */
+     int n=N/2;
+     int nm1 = n-1;
+     int i=0; 
+     int j=0;
+     for (; i < nm1; i++) {
+ 
+       /*int ii = 2*i; */
+       int ii = i << 1;
+ 
+       /*int jj = 2*j; */
+       int jj = j << 1;
+ 
+       /* int k = n / 2 ; */
+       int k = n >> 1;
+ 
+       if (i < j) {
+         double tmp_real    = data[ii];
+         double tmp_imag    = data[ii+1];
+         data[ii]   = data[jj];
+         data[ii+1] = data[jj+1];
+         data[jj]   = tmp_real;
+         data[jj+1] = tmp_imag; }
+ 
+       while (k <= j) 
+       {
+         /*j = j - k ; */
+         j -= k;
+ 
+         /*k = k / 2 ;  */
+         k >>= 1 ; 
+       }
+       j += k ;
+     }
+   }
+ 
+ 
+ void FFT_transform(int N, double *data)
+ {
+     FFT_transform_internal(N, data, -1);
+ }
+ 
+ 
+ void FFT_inverse(int N, double *data)
+ {
+     int n = N/2;
+     double norm = 0.0;
+     int i=0;
+     FFT_transform_internal(N, data, +1);
+ 
+     /* Normalize */
+ 
+ 
+     norm=1/((double) n);
+     for(i=0; i<N; i++)
+       data[i] *= norm;
+   
+ }
+ 
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.h:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/FFT.h	Wed Oct 20 13:13:10 2004
***************
*** 0 ****
--- 1,5 ----
+ 
+ void FFT_transform(int N, double *data);
+ void FFT_inverse(int N, double *data);
+ void FFT_bitreverse(int N, double *data);
+ double FFT_num_flops(int N);


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/LU.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/LU.c:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/LU.c	Wed Oct 20 13:13:10 2004
***************
*** 0 ****
--- 1,102 ----
+ #include <math.h>
+ #include "LU.h"
+ 
+ double LU_num_flops(int N)
+ {
+         /* rougly 2/3*N^3 */
+ 
+     double Nd = (double) N;
+ 
+     return (2.0 * Nd *Nd *Nd/ 3.0);
+ }
+ 
+ 
+ void LU_copy_matrix(int M, int N, double **lu, double **A)
+ {
+     int i;
+     int j;
+ 
+     for (i=0; i<M; i++)
+         for (j=0; j<N; j++)
+             lu[i][j] = A[i][j];
+ }
+ 
+ 
+ int LU_factor(int M, int N, double **A,  int *pivot)
+ {
+  
+ 
+     int minMN =  M < N ? M : N;
+     int j=0;
+ 
+     for (j=0; j<minMN; j++)
+     {
+         /* find pivot in column j and  test for singularity. */
+ 
+         int jp=j;
+         int i;
+         
+         double t = fabs(A[j][j]);
+         for (i=j+1; i<M; i++)
+         {
+             double ab = fabs(A[i][j]);
+             if ( ab > t)
+             {
+                 jp = i;
+                 t = ab;
+             }
+         }
+         
+         pivot[j] = jp;
+ 
+         /* jp now has the index of maximum element  */
+         /* of column j, below the diagonal          */
+ 
+         if ( A[jp][j] == 0 )                 
+             return 1;       /* factorization failed because of zero pivot */
+ 
+ 
+         if (jp != j)
+         {
+             /* swap rows j and jp */
+             double *tA = A[j];
+             A[j] = A[jp];
+             A[jp] = tA;
+         }
+ 
+         if (j<M-1)                /* compute elements j+1:M of jth column  */
+         {
+             /* note A(j,j), was A(jp,p) previously which was */
+             /* guarranteed not to be zero (Label #1)         */
+ 
+             double recp =  1.0 / A[j][j];
+             int k;
+             for (k=j+1; k<M; k++)
+                 A[k][j] *= recp;
+         }
+ 
+ 
+         if (j < minMN-1)
+         {
+             /* rank-1 update to trailing submatrix:   E = E - x*y; */
+             /* E is the region A(j+1:M, j+1:N) */
+             /* x is the column vector A(j+1:M,j) */
+             /* y is row vector A(j,j+1:N)        */
+ 
+             int ii;
+             for (ii=j+1; ii<M; ii++)
+             {
+                 double *Aii = A[ii];
+                 double *Aj = A[j];
+                 double AiiJ = Aii[j];
+                 int jj;
+                 for (jj=j+1; jj<N; jj++)
+                   Aii[jj] -= AiiJ * Aj[jj];
+ 
+             }
+         }
+     }
+ 
+     return 0;
+ }
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/LU.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/LU.h:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/LU.h	Wed Oct 20 13:13:10 2004
***************
*** 0 ****
--- 1,9 ----
+ #ifndef LU_H
+ #define LU_H
+ 
+ double LU_num_flops(int N);
+ void LU_copy_matrix(int M, int N, double **lu, double **A);
+ int LU_factor(int M, int N, double **A, int *pivot);
+ 
+ 
+ #endif


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/Makefile
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/Makefile:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/Makefile	Wed Oct 20 13:13:10 2004
***************
*** 0 ****
--- 1,13 ----
+ LEVEL = ../../..
+ PROG       = scimark2
+ CPPFLAGS   = 
+ LDFLAGS    = -lm
+ 
+ ifdef LARGE_PROBLEM_SIZE
+ RUN_OPTIONS = -large
+ endif
+ 
+ # This program is a timing related test
+ PROGRAM_IS_NONDETERMINISTIC = 1
+ 
+ include	../../Makefile.multisrc


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/MonteCarlo.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/MonteCarlo.c:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/MonteCarlo.c	Wed Oct 20 13:13:10 2004
***************
*** 0 ****
--- 1,70 ----
+ #include "Random.h"
+ 
+ /**
+  Estimate Pi by approximating the area of a circle.
+ 
+  How: generate N random numbers in the unit square, (0,0) to (1,1)
+  and see how are within a radius of 1 or less, i.e.
+  <pre>  
+ 
+  sqrt(x^2 + y^2) < r
+ 
+  </pre>
+   since the radius is 1.0, we can square both sides
+   and avoid a sqrt() computation:
+   <pre>
+ 
+     x^2 + y^2 <= 1.0
+ 
+   </pre>
+   this area under the curve is (Pi * r^2)/ 4.0,
+   and the area of the unit of square is 1.0,
+   so Pi can be approximated by 
+   <pre>
+                 # points with x^2+y^2 < 1
+      Pi =~      --------------------------  * 4.0
+                      total # points
+ 
+   </pre>
+ 
+ */
+ 
+ static const int SEED = 113;
+ 
+ 
+     double MonteCarlo_num_flops(int Num_samples)
+     {
+         /* 3 flops in x^2+y^2 and 1 flop in random routine */
+ 
+         return ((double) Num_samples)* 4.0;
+ 
+     }
+ 
+     
+ 
+     double MonteCarlo_integrate(int Num_samples)
+     {
+ 
+ 
+         Random R = new_Random_seed(SEED);
+ 
+ 
+         int under_curve = 0;
+         int count;
+ 
+         for (count=0; count<Num_samples; count++)
+         {
+             double x= Random_nextDouble(R);
+             double y= Random_nextDouble(R);
+ 
+             if ( x*x + y*y <= 1.0)
+                  under_curve ++;
+             
+         }
+ 
+         Random_delete(R);
+ 
+         return ((double) under_curve / Num_samples) * 4.0;
+     }
+ 
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/MonteCarlo.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/MonteCarlo.h:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/MonteCarlo.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,2 ----
+ double MonteCarlo_integrate(int Num_samples);
+ double MonteCarlo_num_flops(int Num_samples);


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/README
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/README:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/README	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,51 ----
+ 		SciMark2 (C version)
+ 
+ This is an ANSI C version of the SciMark2 benchmark,
+ translated from the original Java sources.  The intent
+ in making this benchmark available in C is for mainly
+ for performance comparisons.   For more information
+ about SciMark, see http://math.nist.gov/scimark.
+ 
+ Results of this benchmark can be sent to pozo at nist.gov.
+ 
+ The program is split up into the main driver (scimark2.c) and
+ kernel routines.  A sample makefile is included; 
+ however, one could simply write 
+ 
+ 
+ > cc -o scimark2  -O *.c
+ 
+ and then run
+ 
+ > scimark2
+ 
+ This produces an output similar to
+ 
+ 
+ **                                                              **
+ ** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark **
+ ** for details. (Results can be submitted to pozo at nist.gov)     **
+ **                                                              **
+ Using       2.00 seconds min time per kenel.
+ Composite Score:           65.56
+ FFT             Mflops:    63.38    (N=1024)
+ SOR             Mflops:   124.80    (100 x 100)
+ MonteCarlo:     Mflops:    16.05
+ Sparse matmult  Mflops:    59.15    (N=1000, nz=5000)
+ LU              Mflops:    64.40    (M=100, N=100)
+ 0:29.62 Elapsed, 29.620 user sec, 0.010 sys sec, 100.0% utilization.
+ 
+ 
+ 
+ The first SciMark number reported is the composite score, followed
+ by the an approximate Mflop rate for each kernel.
+ 
+ 
+ To run the "large" version of this benchmark (with data structures
+ that typically do not fit in cache) use
+ 
+ >scimark2 -large
+ 
+ 
+ ------------------------------------------------------------------
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/Random.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/Random.c:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/Random.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,173 ----
+ 
+ 
+ /* #include <malloc.h>  (brg) */
+ 
+ #include "Random.h"
+ 
+ #ifndef NULL
+ #define NULL 0
+ #endif
+ 
+ 
+   /* static const int mdig = 32; */
+ #define MDIG 32
+ 
+   /* static const int one = 1; */
+ #define ONE 1
+ 
+   static const int m1 = (ONE << (MDIG-2)) + ((ONE << (MDIG-2) )-ONE);
+   static const int m2 = ONE << MDIG/2;
+ 
+   /* For mdig = 32 : m1 =          2147483647, m2 =      65536
+      For mdig = 64 : m1 = 9223372036854775807, m2 = 4294967296 
+   */
+ 
+                                 /* move to initialize() because  */
+                                 /* compiler could not resolve as */
+                                 /*   a constant.                 */
+ 
+   static /*const*/ double dm1;  /*  = 1.0 / (double) m1; */
+ 
+ 
+ /* private methods (defined below, but not in Random.h */
+ 
+ static void initialize(Random R, int seed);
+ 
+ Random new_Random_seed(int seed)
+ {
+     Random R = (Random) malloc(sizeof(Random_struct));
+ 
+     initialize(R, seed);
+     R->left = 0.0;
+     R->right = 1.0;
+     R->width = 1.0;
+     R->haveRange = 0 /*false*/;
+ 
+     return R;
+ }
+ 
+ Random new_Random(int seed, double left, double right) 
+ {
+     Random R = (Random) malloc(sizeof(Random_struct));
+ 
+     initialize(R, seed);
+     R->left = left;
+     R->right = right;
+     R->width = right - left;
+     R->haveRange = 1;          /* true */
+ 
+     return R;
+ }
+ 
+ void Random_delete(Random R)
+ {
+     free(R);
+ }
+ 
+ 
+ 
+ /* Returns the next random number in the sequence.  */
+ 
+ double Random_nextDouble(Random R) 
+ {
+     int k;
+ 
+     int I = R->i;
+     int J = R->j;
+     int *m = R->m;
+ 
+     k = m[I] - m[J];
+     if (k < 0) k += m1;
+     R->m[J] = k;
+ 
+     if (I == 0) 
+         I = 16;
+     else I--;
+     R->i = I;
+ 
+     if (J == 0) 
+         J = 16 ;
+     else J--;
+     R->j = J;
+ 
+     if (R->haveRange) 
+         return  R->left +  dm1 * (double) k * R->width;
+     else
+         return dm1 * (double) k;
+ 
+ } 
+ 
+ 
+ 
+ 
+ /*--------------------------------------------------------------------
+                            PRIVATE METHODS
+   ----------------------------------------------------------------- */
+ 
+ static void initialize(Random R, int seed) 
+ {
+ 
+     int jseed, k0, k1, j0, j1, iloop;
+ 
+     dm1  = 1.0 / (double) m1; 
+ 
+     R->seed = seed;
+ 
+     if (seed < 0 ) seed = -seed;            /* seed = abs(seed) */  
+     jseed = (seed < m1 ? seed : m1);        /* jseed = min(seed, m1) */
+     if (jseed % 2 == 0) --jseed;
+     k0 = 9069 % m2;
+     k1 = 9069 / m2;
+     j0 = jseed % m2;
+     j1 = jseed / m2;
+     for (iloop = 0; iloop < 17; ++iloop) 
+     {
+         jseed = j0 * k0;
+         j1 = (jseed / m2 + j0 * k1 + j1 * k0) % (m2 / 2);
+         j0 = jseed % m2;
+         R->m[iloop] = j0 + m2 * j1;
+     }
+     R->i = 4;
+     R->j = 16;
+ 
+ }
+ 
+ double *RandomVector(int N, Random R)
+ {
+     int i;
+     double *x = (double *) malloc(sizeof(double)*N);
+ 
+     for (i=0; i<N; i++)
+         x[i] = Random_nextDouble(R);
+ 
+     return x;
+ }
+ 
+ 
+ double **RandomMatrix(int M, int N, Random R)
+ {
+     int i;
+     int j;
+ 
+     /* allocate matrix */
+ 
+     double **A = (double **) malloc(sizeof(double*)*M);
+ 
+     if (A == NULL) return NULL;
+ 
+     for (i=0; i<M; i++)
+     {
+         A[i] = (double *) malloc(sizeof(double)*N);
+         if (A[i] == NULL) 
+         {
+             free(A);
+             return NULL;
+         }
+         for (j=0; j<N; j++)
+             A[i][j] = Random_nextDouble(R);
+     }
+     return A;
+ }
+ 
+ 
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/Random.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/Random.h:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/Random.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,18 ----
+ typedef struct
+ {
+   int m[17];                        
+   int seed;                             
+   int i;                                /* originally = 4 */
+   int j;                                /* originally =  16 */
+   int /*boolean*/ haveRange;            /* = false; */
+   double left;                          /*= 0.0; */
+   double right;                         /* = 1.0; */
+   double width;                         /* = 1.0; */
+ }
+ Random_struct, *Random;
+ 
+ Random new_Random_seed(int seed);
+ double Random_nextDouble(Random R);
+ void Random_delete(Random R);
+ double *RandomVector(int N, Random R);
+ double **RandomMatrix(int M, int N, Random R);


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/SOR.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/SOR.c:1.1
*** /dev/null	Wed Oct 20 13:13:26 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/SOR.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,43 ----
+ #include "SOR.h"
+ 
+     double SOR_num_flops(int M, int N, int num_iterations)
+     {
+         double Md = (double) M;
+         double Nd = (double) N;
+         double num_iterD = (double) num_iterations;
+ 
+         return (Md-1)*(Nd-1)*num_iterD*6.0;
+     }
+ 
+     void SOR_execute(int M, int N, double omega, double **G, int 
+             num_iterations)
+     {
+ 
+         double omega_over_four = omega * 0.25;
+         double one_minus_omega = 1.0 - omega;
+ 
+         /* update interior points */
+ 
+         int Mm1 = M-1;
+         int Nm1 = N-1; 
+         int p;
+         int i;
+         int j;
+         double *Gi;
+         double *Gim1;
+         double *Gip1;
+ 
+         for (p=0; p<num_iterations; p++)
+         {
+             for (i=1; i<Mm1; i++)
+             {
+                 Gi = G[i];
+                 Gim1 = G[i-1];
+                 Gip1 = G[i+1];
+                 for (j=1; j<Nm1; j++)
+                     Gi[j] = omega_over_four * (Gim1[j] + Gip1[j] + Gi[j-1] 
+                                 + Gi[j+1]) + one_minus_omega * Gi[j];
+             }
+         }
+     }
+             


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/SOR.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/SOR.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/SOR.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,4 ----
+ 
+ double SOR_num_flops(int M, int N, int num_iterations);
+ void SOR_execute(int M, int N,double omega, double **G, int num_iterations);
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/SparseCompRow.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/SparseCompRow.c:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/SparseCompRow.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,43 ----
+     /* multiple iterations used to make kernel have roughly
+         same granulairty as other Scimark kernels. */
+ 
+     double SparseCompRow_num_flops(int N, int nz, int num_iterations)
+     {
+         /* Note that if nz does not divide N evenly, then the
+            actual number of nonzeros used is adjusted slightly.
+         */
+         int actual_nz = (nz/N) * N;
+         return ((double)actual_nz) * 2.0 * ((double) num_iterations);
+     }
+ 
+ 
+     /* computes  a matrix-vector multiply with a sparse matrix
+         held in compress-row format.  If the size of the matrix
+         in MxN with nz nonzeros, then the val[] is the nz nonzeros,
+         with its ith entry in column col[i].  The integer vector row[]
+         is of size M+1 and row[i] points to the begining of the
+         ith row in col[].  
+     */
+ 
+     void SparseCompRow_matmult( int M, double *y, double *val, int *row,
+         int *col, double *x, int NUM_ITERATIONS)
+     {
+         int reps;
+         int r;
+         int i;
+ 
+         for (reps=0; reps<NUM_ITERATIONS; reps++)
+         {
+         
+             for (r=0; r<M; r++)
+             {
+                 double sum = 0.0; 
+                 int rowR = row[r];
+                 int rowRp1 = row[r+1];
+                 for (i=rowR; i<rowRp1; i++)
+                     sum += x[ col[i] ] * val[i];
+                 y[r] = sum;
+             }
+         }
+     }
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/SparseCompRow.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/SparseCompRow.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/SparseCompRow.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,10 ----
+ 
+ #ifndef SPARSE_COMPROW_H
+ #define SPARSE_COMPROW_H
+ 
+ double SparseCompRow_num_flops(int N, int nz, int num_iterations);
+ 
+ void SparseCompRow_matmult( int M, double *y, double *val, int *row,
+         int *col, double *x, int NUM_ITERATIONS);
+ 
+ #endif


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/Stopwatch.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/Stopwatch.c:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/Stopwatch.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,82 ----
+ /* #include <malloc.h> (brg) */
+ #include "Stopwatch.h"
+ 
+ double seconds()
+ {
+         return ((double) clock()) / (double) CLOCKS_PER_SEC; 
+ }
+ 
+ void Stopwtach_reset(Stopwatch Q)
+ {
+     Q->running = 0;         /* false */
+     Q->last_time = 0.0;
+     Q->total= 0.0;
+ }
+ 
+ 
+ Stopwatch new_Stopwatch(void)
+ {
+     Stopwatch S = (Stopwatch) malloc(sizeof(Stopwatch_struct));
+     if (S == NULL)
+         return NULL;
+ 
+     Stopwtach_reset(S);
+     return S;
+ }
+ 
+ void Stopwatch_delete(Stopwatch S)
+ {
+     if (S != NULL)
+         free(S);
+ }
+ 
+ 
+ /* Start resets the timer to 0.0; use resume for continued total */
+ 
+ void Stopwatch_start(Stopwatch Q)
+ {
+     if (! (Q->running)  )
+     {
+         Q->running = 1;  /* true */
+         Q->total = 0.0;
+         Q->last_time = seconds();
+     }
+ }
+    
+ /** 
+     Resume timing, after stopping.  (Does not wipe out
+         accumulated times.)
+ 
+ */
+ 
+ void Stopwatch_resume(Stopwatch Q)
+ {
+     if (!(Q->running))
+     {
+         Q-> last_time = seconds(); 
+         Q->running = 1;  /*true*/
+     }
+ }
+    
+ void Stopwatch_stop(Stopwatch Q)  
+ { 
+     if (Q->running) 
+     { 
+         Q->total += seconds() - Q->last_time; 
+         Q->running = 0;  /* false */
+     }
+ }
+   
+  
+ double Stopwatch_read(Stopwatch Q)
+ {  
+     
+     if (Q->running) 
+     {
+         double t = seconds();
+         Q->total += t - Q->last_time;
+         Q->last_time = t;
+     }
+     return Q->total;
+ }
+         


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/Stopwatch.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/Stopwatch.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/Stopwatch.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,23 ----
+ 
+ #include <time.h>
+ 
+ typedef struct{
+     int running;        /* boolean */
+     double last_time;
+     double total;
+ 
+ } *Stopwatch, Stopwatch_struct;
+ 
+ 
+ 
+ double seconds();
+ 
+ void Stopwtach_reset(Stopwatch Q);
+ 
+ Stopwatch new_Stopwatch(void);
+ void Stopwatch_delete(Stopwatch S);
+ void Stopwatch_start(Stopwatch Q);
+ void Stopwatch_resume(Stopwatch Q);
+ void Stopwatch_stop(Stopwatch Q);
+ double Stopwatch_read(Stopwatch Q);
+         


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/array.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/array.c:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/array.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,77 ----
+ /* #include <malloc.h> (brg) */
+ #include <stdio.h>
+ #include "array.h"
+ 
+ #ifndef NULL
+ #define NULL 0
+ #endif
+ 
+ 
+ double** new_Array2D_double(int M, int N)
+ {
+     int i=0;
+     int failed = 0;
+ 
+     double **A = (double**) malloc(sizeof(double*)*M);
+     if (A == NULL)
+         return NULL;
+ 
+     for (i=0; i<M; i++)
+     {
+         A[i] = (double*) malloc(N * sizeof(double));
+         if (A[i] == NULL)
+         {
+             failed = 1;
+             break;
+         }
+     }
+ 
+     /* if we didn't successfully allocate all rows of A      */
+     /* clean up any allocated memory (i.e. go back and free  */
+     /* previous rows) and return NULL                        */
+ 
+     if (failed)
+     {
+         i--;
+         for (; i<=0; i--)
+             free(A[i]);
+         free(A);
+         return NULL;
+     }
+     else
+         return A;
+ }
+ void Array2D_double_delete(int M, int N, double **A)
+ {
+     int i;
+     if (A == NULL) return;
+ 
+     for (i=0; i<M; i++)
+         free(A[i]);
+ 
+     free(A);
+ }
+ 
+ 
+   void Array2D_double_copy(int M, int N, double **B, double **A)
+   {
+ 
+         int remainder = N & 3;       /* N mod 4; */
+         int i=0;
+         int j=0;
+ 
+         for (i=0; i<M; i++)
+         {
+             double *Bi = B[i];
+             double *Ai = A[i];
+             for (j=0; j<remainder; j++)
+                 Bi[j] = Ai[j];
+             for (j=remainder; j<N; j+=4)
+             {
+                 Bi[j] = Ai[j];
+                 Bi[j+1] = Ai[j+1];
+                 Bi[j+2] = Ai[j+2];
+                 Bi[j+3] = Ai[j+3];
+             }
+         }
+   }


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/array.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/array.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/array.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,9 ----
+ 
+ #ifndef ARRAY_H
+ #define ARRAY_H
+ 
+ double **new_Array2D_double(int M, int N);
+ void Array2D_double_delete(int M, int N, double **A);
+ void Array2D_double_copy(int M, int N, double **B, double **A);
+ 
+ #endif


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/constants.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/constants.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/constants.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,35 ----
+ #ifndef CONSTANTS_H_
+ #define CONSTANTS_H_
+ 
+      const  double RESOLUTION_DEFAULT = 2.0;  /* secs (normally 2.0) */
+      const  int RANDOM_SEED = 101010;
+ 
+     /* default: small (cache-contained) problem sizes */
+ 
+      const  int FFT_SIZE = 1024;  /* must be a power of two */
+      const  int SOR_SIZE =100; /* NxN grid */
+      const  int SPARSE_SIZE_M = 1000;
+      const  int SPARSE_SIZE_nz = 5000;
+      const  int LU_SIZE = 100;
+ 
+     /* large (out-of-cache) problem sizes */
+ 
+      const  int LG_FFT_SIZE = 1048576;  /* must be a power of two */
+      const  int LG_SOR_SIZE =1000;  /*  NxN grid  */
+      const  int LG_SPARSE_SIZE_M = 100000;
+      const  int LG_SPARSE_SIZE_nz =1000000;
+      const  int LG_LU_SIZE = 1000;
+ 
+     /* tiny problem sizes (used to mainly to preload network classes     */
+     /*                     for applet, so that network download times    */
+     /*                     are factored out of benchmark.)               */
+     /*                                                                   */
+      const  int TINY_FFT_SIZE = 16;  /* must be a power of two */
+      const  int TINY_SOR_SIZE =10; /* NxN grid */
+      const  int TINY_SPARSE_SIZE_M = 10;
+      const  int TINY_SPARSE_SIZE_N = 10;
+      const  int TINY_SPARSE_SIZE_nz = 50;
+      const  int TINY_LU_SIZE = 10;
+ 
+ #endif
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/kernel.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/kernel.c:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/kernel.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,231 ----
+ #include <stdio.h>
+ /* #include <malloc.h>  (brg) */
+ #include <stdlib.h>
+ #include "LU.h"
+ #include "FFT.h"
+ #include "SOR.h"
+ #include "MonteCarlo.h"
+ #include "LU.h"
+ #include "Random.h" 
+ #include "Stopwatch.h"  
+ #include "SparseCompRow.h"
+ #include "array.h"
+ 
+ 
+     double kernel_measureFFT(int N, double mintime, Random R)
+     {
+         /* initialize FFT data as complex (N real/img pairs) */
+ 
+         int twoN = 2*N;
+         double *x = RandomVector(twoN, R);
+         long cycles = 1;
+         Stopwatch Q = new_Stopwatch();
+         int i=0;
+         double result = 0.0;
+ 
+         while(1)
+         {
+             Stopwatch_start(Q);
+             for (i=0; i<cycles; i++)
+             {
+                 FFT_transform(twoN, x);     /* forward transform */
+                 FFT_inverse(twoN, x);       /* backward transform */
+             }
+             Stopwatch_stop(Q);
+             if (Stopwatch_read(Q) >= mintime)
+                 break;
+ 
+             cycles *= 2;
+ 
+         }
+         /* approx Mflops */
+ 
+         result = FFT_num_flops(N)*cycles/ Stopwatch_read(Q) * 1.0e-6;
+         Stopwatch_delete(Q);
+         free(x);
+         return result;
+     }
+ 
+     double kernel_measureSOR(int N, double min_time, Random R)
+     {
+         double **G = RandomMatrix(N, N, R);
+         double result = 0.0;
+ 
+         Stopwatch Q = new_Stopwatch();
+         int cycles=1;
+         while(1)
+         {
+             Stopwatch_start(Q);
+             SOR_execute(N, N, 1.25, G, cycles);
+             Stopwatch_stop(Q);
+ 
+             if (Stopwatch_read(Q) >= min_time) break;
+ 
+             cycles *= 2;
+         }
+         /* approx Mflops */
+ 
+         result = SOR_num_flops(N, N, cycles) / Stopwatch_read(Q) * 1.0e-6;
+         Stopwatch_delete(Q);
+         Array2D_double_delete(N, N, G);
+         return result;
+ 
+     }
+ 
+ 
+ 
+     double kernel_measureMonteCarlo(double min_time, Random R)
+     {
+         double result = 0.0;
+         Stopwatch Q = new_Stopwatch();
+ 
+         int cycles=1;
+         while(1)
+         {
+             Stopwatch_start(Q);
+             MonteCarlo_integrate(cycles);
+             Stopwatch_stop(Q);
+             if (Stopwatch_read(Q) >= min_time) break;
+ 
+             cycles *= 2;
+         }
+         /* approx Mflops */
+         result = MonteCarlo_num_flops(cycles) / Stopwatch_read(Q) * 1.0e-6;
+         Stopwatch_delete(Q);
+         return result;
+     }
+ 
+ 
+     double kernel_measureSparseMatMult(int N, int nz, 
+             double min_time, Random R)
+     {
+         /* initialize vector multipliers and storage for result */
+         /* y = A*y;  */
+ 
+         double *x = RandomVector(N, R);
+         double *y = (double*) malloc(sizeof(double)*N);
+ 
+         double result = 0.0;
+ 
+ #if 0
+         // initialize square sparse matrix
+         //
+         // for this test, we create a sparse matrix with M/nz nonzeros
+         // per row, with spaced-out evenly between the begining of the
+         // row to the main diagonal.  Thus, the resulting pattern looks
+         // like
+         //             +-----------------+
+         //             +*                +
+         //             +***              +
+         //             +* * *            +
+         //             +** *  *          +
+         //             +**  *   *        +
+         //             +* *   *   *      +
+         //             +*  *   *    *    +
+         //             +*   *    *    *  + 
+         //             +-----------------+
+         //
+         // (as best reproducible with integer artihmetic)
+         // Note that the first nr rows will have elements past
+         // the diagonal.
+ #endif
+ 
+         int nr = nz/N;      /* average number of nonzeros per row  */
+         int anz = nr *N;    /* _actual_ number of nonzeros         */
+ 
+             
+         double *val = RandomVector(anz, R);
+         int *col = (int*) malloc(sizeof(int)*nz);
+         int *row = (int*) malloc(sizeof(int)*(N+1));
+         int r=0;
+         int cycles=1;
+ 
+         Stopwatch Q = new_Stopwatch();
+ 
+         row[0] = 0; 
+         for (r=0; r<N; r++)
+         {
+             /* initialize elements for row r */
+ 
+             int rowr = row[r];
+             int step = r/ nr;
+             int i=0;
+ 
+             row[r+1] = rowr + nr;
+             if (step < 1) step = 1;   /* take at least unit steps */
+ 
+ 
+             for (i=0; i<nr; i++)
+                 col[rowr+i] = i*step;
+                 
+         }
+ 
+ 
+         while(1)
+         {
+             Stopwatch_start(Q);
+             SparseCompRow_matmult(N, y, val, row, col, x, cycles);
+             Stopwatch_stop(Q);
+             if (Stopwatch_read(Q) >= min_time) break;
+ 
+             cycles *= 2;
+         }
+         /* approx Mflops */
+         result = SparseCompRow_num_flops(N, nz, cycles) / 
+                         Stopwatch_read(Q) * 1.0e-6;
+ 
+         Stopwatch_delete(Q);
+         free(row);
+         free(col);
+         free(val);
+         free(y);
+         free(x);
+ 
+         return result;
+     }
+ 
+ 
+     double kernel_measureLU(int N, double min_time, Random R)
+     {
+ 
+         double **A = NULL;
+         double **lu = NULL; 
+         int *pivot = NULL;
+ 
+     
+ 
+         Stopwatch Q = new_Stopwatch();
+         double result = 0.0;
+         int i=0;
+         int cycles=1;
+ 
+         if ((A = RandomMatrix(N, N,  R)) == NULL) exit(1);
+         if ((lu = new_Array2D_double(N, N)) == NULL) exit(1);
+         if ((pivot = (int *) malloc(N * sizeof(int))) == NULL) exit(1);
+ 
+ 
+         while(1)
+         {
+             Stopwatch_start(Q);
+             for (i=0; i<cycles; i++)
+             {
+                 Array2D_double_copy(N, N, lu, A);
+                 LU_factor(N, N, lu, pivot);
+             }
+             Stopwatch_stop(Q);
+             if (Stopwatch_read(Q) >= min_time) break;
+ 
+             cycles *= 2;
+         }
+         /* approx Mflops */
+         result = LU_num_flops(N) * cycles / Stopwatch_read(Q) * 1.0e-6;
+ 
+         Stopwatch_delete(Q);
+         free(pivot); 
+         Array2D_double_delete(N, N, lu); 
+         Array2D_double_delete(N, N, A);
+ 
+         return result;
+ 
+     }
+ 


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/kernel.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/kernel.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/kernel.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,11 ----
+ #ifndef KERNEL_H
+ #define KERNEL_H
+ 
+ double kernel_measureFFT( int FFT_size, double min_time, Random R);
+ double kernel_measureSOR( int SOR_size, double min_time, Random R);
+ double kernel_measureMonteCarlo( double min_time, Random R);
+ double kernel_measureSparseMatMult(int Sparse_size_N,
+     int Sparse_size_nz, double min_time, Random R);
+ double kernel_measureLU( int LU_size, double min_time, Random R);
+ 
+ #endif


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/scimark2.c
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/scimark2.c:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/scimark2.c	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,98 ----
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "Random.h"
+ #include "kernel.h"
+ #include "constants.h"
+ 
+ void print_banner(void);
+ 
+ int main(int argc, char *argv[])
+ {
+         /* default to the (small) cache-contained version */
+ 
+         double min_time = RESOLUTION_DEFAULT;
+ 
+         int FFT_size = FFT_SIZE;
+         int SOR_size =  SOR_SIZE;
+         int Sparse_size_M = SPARSE_SIZE_M;
+         int Sparse_size_nz = SPARSE_SIZE_nz;
+         int LU_size = LU_SIZE;
+ 
+ 
+         /* run the benchmark */
+ 
+         double res[6] = {0.0};
+         Random R = new_Random_seed(RANDOM_SEED);
+ 
+ 
+         if (argc > 1)
+         {
+ 			int current_arg = 1;
+ 
+ 			if (strcmp(argv[1], "-help")==0  ||
+ 					strcmp(argv[1], "-h") == 0)
+ 			{
+ 				fprintf(stderr, "Usage: [-large] [minimum_time]\n");
+ 				exit(0);
+ 			}
+ 
+ 			if (strcmp(argv[1], "-large")==0)
+ 			{
+ 				FFT_size = LG_FFT_SIZE;
+ 				SOR_size = LG_SOR_SIZE;
+ 				Sparse_size_M = LG_SPARSE_SIZE_M;
+ 				Sparse_size_nz = LG_SPARSE_SIZE_nz;
+ 				LU_size = LG_LU_SIZE;
+ 
+ 				current_arg++;
+ 			}
+ 
+ 			if (current_arg < argc)
+ 			{
+ 				min_time = atof(argv[current_arg]);
+ 			}
+ 			
+         }
+ 
+ 	
+ 		print_banner();
+ 		printf("Using %10.2f seconds min time per kenel.\n", min_time);
+ 
+         res[1] = kernel_measureFFT( FFT_size, min_time, R);   
+         res[2] = kernel_measureSOR( SOR_size, min_time, R);   
+         res[3] = kernel_measureMonteCarlo(min_time, R); 
+         res[4] = kernel_measureSparseMatMult( Sparse_size_M, 
+                 Sparse_size_nz, min_time, R);           
+         res[5] = kernel_measureLU( LU_size, min_time, R);  
+ 
+ 
+ 
+         res[0] = (res[1] + res[2] + res[3] + res[4] + res[5]) / 5;
+ 
+         /* print out results  */
+         printf("Composite Score:        %8.2f\n" ,res[0]);
+         printf("FFT             Mflops: %8.2f    (N=%d)\n", res[1], FFT_size);
+         printf("SOR             Mflops: %8.2f    (%d x %d)\n", 		
+ 				res[2], SOR_size, SOR_size);
+         printf("MonteCarlo:     Mflops: %8.2f\n", res[3]);
+         printf("Sparse matmult  Mflops: %8.2f    (N=%d, nz=%d)\n", res[4], 
+ 					Sparse_size_M, Sparse_size_nz);
+         printf("LU              Mflops: %8.2f    (M=%d, N=%d)\n", res[5],
+ 				LU_size, LU_size);
+ 
+ 
+         Random_delete(R);
+ 
+         return 0;
+   
+ }
+ 
+ void print_banner()
+ {
+  printf("**                                                              **\n");
+  printf("** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark **\n");
+  printf("** for details. (Results can be submitted to pozo at nist.gov)     **\n");
+  printf("**                                                              **\n");
+ }


Index: llvm-test/MultiSource/Benchmarks/SciMark2-C/scimark2.h
diff -c /dev/null llvm-test/MultiSource/Benchmarks/SciMark2-C/scimark2.h:1.1
*** /dev/null	Wed Oct 20 13:13:27 2004
--- llvm-test/MultiSource/Benchmarks/SciMark2-C/scimark2.h	Wed Oct 20 13:13:11 2004
***************
*** 0 ****
--- 1,22 ----
+ 
+ #ifndef SCIMARK2_H
+ #define SCIMARK2_H
+ 
+ #define VERSION 2.0
+ 
+ #ifndef NULL 
+ #define NULL 0
+ #endif
+ 
+ #ifndef TRUE
+ #define TRUE 1
+ #endif
+ 
+ #ifndef FALSE
+ #define FALSE 0
+ #endif
+ 
+ 
+ 
+ #endif
+ 






More information about the llvm-commits mailing list