[LLVMdev] Another compiler shootout

Chris Lattner clattner at apple.com
Mon Dec 15 22:08:12 PST 2008


On Dec 15, 2008, at 8:26 PM, Jon Harrop wrote:

> On Tuesday 16 December 2008 01:03:36 Evan Cheng wrote:
>> FYI. http://leonardo-m.livejournal.com/73732.html
>>
>> If anyone is motivated, please file bugs for the losing cases. Also,
>> it might make sense to incorporate the tests into our nightly tester
>> test suite.
>
> FWIW, I just ported my ray tracer benchmark to C and found that llvm- 
> gcc gives
> much worse performance than gcc on x86 but not on x86-64 on an  
> Opteron:
>
> 2.1GHz Opteron
>
> 32-bit
>     gcc 4.3.2: 5.60s (gcc -Wall -O3 -lm ray.c -o ray)
> llvm-gcc 4.2.1: 9.00s (llvm-gcc -O3 -march=opteron -msse2 -lm ray.c - 
> o ray)
>
> 64-bit
>     gcc 4.3.2: 4.18s (gcc -Wall -O3 -lm ray.c -o ray)
> llvm-gcc 4.2.1: 5.00s (llvm-gcc -O3 -march=opteron -msse2 -lm ray.c - 
> o ray)
>
> Note that the LLVM-generated code is 60% slower than GCC's in the  
> first case.

Wow that's bad :), nice relatively small testcase too.

Can you please file a bugzilla report with this .c file, and the  
output of the above compilations in -S mode (so attach the .s file  
from llvm and gcc in 32/64 bit modes)?

It would also be useful to attach the -emit-llvm -S output from the  
llvm-gcc compiles.

> I am unfamiliar with x86 assembler but I believe the problem is that  
> LLVM is
> calling a function for fsqrt rather than using the x86 op-code.  
> Should I be
> passing some command line arguments or using a newer llvm-gcc to get  
> it to
> emit fsqrt or is that not yet implemented?

That sound be very easy to add.  Thanks!

-Chris

>
>
> Benchmark was:
>
>  time ./ray 9 512 >image.pgm
>
> Compile times go down from 0.36s to 0.13s on x86 and 0.35s to 0.19s  
> on x86-64
> as expected.
>
> Here's the code:
>
> #include <float.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <math.h>
>
> #define real float
> #define epsilon FLT_EPSILON
> //#define real double
> //#define epsilon DBL_EPSILON
>
> real delta, INFINITY;
>
> typedef struct { real x, y, z; } Vec;
> Vec vec(real x, real y, real z) { Vec r; r.x=x; r.y=y; r.z=z; return  
> r; }
> Vec add(const Vec a, const Vec b) { return vec(a.x+b.x, a.y+b.y, a.z 
> +b.z); }
> Vec sub(const Vec a, const Vec b) { return vec(a.x-b.x, a.y-b.y, a.z- 
> b.z); }
> Vec scale(real a, const Vec b) { return vec(a*b.x, a*b.y, a*b.z); }
> real dot(const Vec a, const Vec b) { return a.x*b.x + a.y*b.y +  
> a.z*b.z; }
> Vec unitise(const Vec a) { return scale((1.0 / sqrt(dot(a, a))), a); }
>
> struct Scene {
>  Vec center;
>  real radius;
>  struct Scene *child;
> };
>
> real ray_sphere(Vec o, Vec d, Vec c, real r) {
>  Vec v = sub(c, o);
>  real b = dot(v, d), disc = b*b - dot(v, v) + r*r, t1, t2;
>  if (disc < 0.0) return INFINITY;
>  disc = sqrt(disc);
>  t2 = b + disc;
>  if (t2 < 0.0) return INFINITY;
>  t1 = b - disc;
>  return (t1 > 0.0 ? t1 : t2);
> }
>
> void intersect(Vec o, Vec d, real *lambda, struct Scene **t, struct  
> Scene
> *scene) {
>  real lambda2 = ray_sphere(o, d, scene->center, scene->radius);
>  if (lambda2 < *lambda) {
>    if (scene->child) {
>      int i;
>      for (i=0; i<5; ++i)
>        intersect(o, d, lambda, t, &scene->child[i]);
>    } else {
>      *lambda = lambda2;
>      *t = scene;
>    }
>  }
> }
>
> Vec neglight;
>
> real ray_trace(Vec o, Vec d, struct Scene scene) {
>  real lambda = INFINITY;
>  struct Scene *t = NULL;
>  intersect(o, d, &lambda, &t, &scene);
>  if (lambda == INFINITY) return 0.0;
>  {
>    Vec p = add(o, scale(lambda, d));
>    Vec normal = unitise(sub(p, t->center));
>    real g = dot(normal, neglight);
>    if (g <= 0.0) return 0.0;
>    p = add(p, scale(delta, normal));
>    lambda = INFINITY;
>    intersect(p, neglight, &lambda, &t, &scene);
>    return (lambda < INFINITY ? 0.0 : g);
>  }
> }
>
> struct Scene create(int level, Vec c, real r) {
>  struct Scene scene;
>  scene.center = c;
>  if (level == 1) {
>    scene.radius = r;
>    scene.child = NULL;
>  } else {
>    real rn = 3*r/sqrt(12);
>    scene.radius = 3*r;
>    scene.child = (struct Scene *)malloc(5*sizeof(struct Scene));
>    scene.child[0] = create(1, c, r);
>    scene.child[1] = create(level-1, add(c, scale(rn, vec(-1, 1,  
> -1))), r/2);
>    scene.child[2] = create(level-1, add(c, scale(rn, vec( 1, 1,  
> -1))), r/2);
>    scene.child[3] = create(level-1, add(c, scale(rn, vec(-1, 1,   
> 1))), r/2);
>    scene.child[4] = create(level-1, add(c, scale(rn, vec( 1, 1,   
> 1))), r/2);
>  }
>  return scene;
> }
>
> int main(int argc, char *argv[]) {
>  struct Scene scene;
>  int level, n, ss=4, x, y;
>  level = (argc==3 ? atoi(argv[1]) : 9);
>  n = (argc==3 ? atoi(argv[2]) : 512);
>  delta = sqrt(epsilon);
>  INFINITY = 1.0 / 0.0;
>  neglight = unitise(vec(1, 3, -2));
>  scene = create(level, vec(0, -1, 0), 1);
>  printf("P5\n%d %d\n255\n", n, n);
>  for (y=n-1; y>=0; --y)
>    for (x=0; x<n; ++x) {
>      real g=0.0;
>      int dx, dy;
>      for (dx=0; dx<ss; ++dx)
> 	for (dy=0; dy<ss; ++dy) {
> 	  Vec d=unitise(vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n));
> 	  g += ray_trace(vec(0, 0, -4), d, scene);
> 	}
>      printf("%c", (char)(0.5 + 255.0 * g / (ss*ss)));
>    }
>  return 0;
> }
>
> -- 
> Dr Jon Harrop, Flying Frog Consultancy Ltd.
> http://www.ffconsultancy.com/?e
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev




More information about the llvm-dev mailing list