[LLVMdev] Another compiler shootout
Chris Lattner
clattner at apple.com
Mon Dec 15 22:08:12 PST 2008
On Dec 15, 2008, at 8:26 PM, Jon Harrop wrote:
> On Tuesday 16 December 2008 01:03:36 Evan Cheng wrote:
>> FYI. http://leonardo-m.livejournal.com/73732.html
>>
>> If anyone is motivated, please file bugs for the losing cases. Also,
>> it might make sense to incorporate the tests into our nightly tester
>> test suite.
>
> FWIW, I just ported my ray tracer benchmark to C and found that llvm-
> gcc gives
> much worse performance than gcc on x86 but not on x86-64 on an
> Opteron:
>
> 2.1GHz Opteron
>
> 32-bit
> gcc 4.3.2: 5.60s (gcc -Wall -O3 -lm ray.c -o ray)
> llvm-gcc 4.2.1: 9.00s (llvm-gcc -O3 -march=opteron -msse2 -lm ray.c -
> o ray)
>
> 64-bit
> gcc 4.3.2: 4.18s (gcc -Wall -O3 -lm ray.c -o ray)
> llvm-gcc 4.2.1: 5.00s (llvm-gcc -O3 -march=opteron -msse2 -lm ray.c -
> o ray)
>
> Note that the LLVM-generated code is 60% slower than GCC's in the
> first case.
Wow that's bad :), nice relatively small testcase too.
Can you please file a bugzilla report with this .c file, and the
output of the above compilations in -S mode (so attach the .s file
from llvm and gcc in 32/64 bit modes)?
It would also be useful to attach the -emit-llvm -S output from the
llvm-gcc compiles.
> I am unfamiliar with x86 assembler but I believe the problem is that
> LLVM is
> calling a function for fsqrt rather than using the x86 op-code.
> Should I be
> passing some command line arguments or using a newer llvm-gcc to get
> it to
> emit fsqrt or is that not yet implemented?
That sound be very easy to add. Thanks!
-Chris
>
>
> Benchmark was:
>
> time ./ray 9 512 >image.pgm
>
> Compile times go down from 0.36s to 0.13s on x86 and 0.35s to 0.19s
> on x86-64
> as expected.
>
> Here's the code:
>
> #include <float.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <math.h>
>
> #define real float
> #define epsilon FLT_EPSILON
> //#define real double
> //#define epsilon DBL_EPSILON
>
> real delta, INFINITY;
>
> typedef struct { real x, y, z; } Vec;
> Vec vec(real x, real y, real z) { Vec r; r.x=x; r.y=y; r.z=z; return
> r; }
> Vec add(const Vec a, const Vec b) { return vec(a.x+b.x, a.y+b.y, a.z
> +b.z); }
> Vec sub(const Vec a, const Vec b) { return vec(a.x-b.x, a.y-b.y, a.z-
> b.z); }
> Vec scale(real a, const Vec b) { return vec(a*b.x, a*b.y, a*b.z); }
> real dot(const Vec a, const Vec b) { return a.x*b.x + a.y*b.y +
> a.z*b.z; }
> Vec unitise(const Vec a) { return scale((1.0 / sqrt(dot(a, a))), a); }
>
> struct Scene {
> Vec center;
> real radius;
> struct Scene *child;
> };
>
> real ray_sphere(Vec o, Vec d, Vec c, real r) {
> Vec v = sub(c, o);
> real b = dot(v, d), disc = b*b - dot(v, v) + r*r, t1, t2;
> if (disc < 0.0) return INFINITY;
> disc = sqrt(disc);
> t2 = b + disc;
> if (t2 < 0.0) return INFINITY;
> t1 = b - disc;
> return (t1 > 0.0 ? t1 : t2);
> }
>
> void intersect(Vec o, Vec d, real *lambda, struct Scene **t, struct
> Scene
> *scene) {
> real lambda2 = ray_sphere(o, d, scene->center, scene->radius);
> if (lambda2 < *lambda) {
> if (scene->child) {
> int i;
> for (i=0; i<5; ++i)
> intersect(o, d, lambda, t, &scene->child[i]);
> } else {
> *lambda = lambda2;
> *t = scene;
> }
> }
> }
>
> Vec neglight;
>
> real ray_trace(Vec o, Vec d, struct Scene scene) {
> real lambda = INFINITY;
> struct Scene *t = NULL;
> intersect(o, d, &lambda, &t, &scene);
> if (lambda == INFINITY) return 0.0;
> {
> Vec p = add(o, scale(lambda, d));
> Vec normal = unitise(sub(p, t->center));
> real g = dot(normal, neglight);
> if (g <= 0.0) return 0.0;
> p = add(p, scale(delta, normal));
> lambda = INFINITY;
> intersect(p, neglight, &lambda, &t, &scene);
> return (lambda < INFINITY ? 0.0 : g);
> }
> }
>
> struct Scene create(int level, Vec c, real r) {
> struct Scene scene;
> scene.center = c;
> if (level == 1) {
> scene.radius = r;
> scene.child = NULL;
> } else {
> real rn = 3*r/sqrt(12);
> scene.radius = 3*r;
> scene.child = (struct Scene *)malloc(5*sizeof(struct Scene));
> scene.child[0] = create(1, c, r);
> scene.child[1] = create(level-1, add(c, scale(rn, vec(-1, 1,
> -1))), r/2);
> scene.child[2] = create(level-1, add(c, scale(rn, vec( 1, 1,
> -1))), r/2);
> scene.child[3] = create(level-1, add(c, scale(rn, vec(-1, 1,
> 1))), r/2);
> scene.child[4] = create(level-1, add(c, scale(rn, vec( 1, 1,
> 1))), r/2);
> }
> return scene;
> }
>
> int main(int argc, char *argv[]) {
> struct Scene scene;
> int level, n, ss=4, x, y;
> level = (argc==3 ? atoi(argv[1]) : 9);
> n = (argc==3 ? atoi(argv[2]) : 512);
> delta = sqrt(epsilon);
> INFINITY = 1.0 / 0.0;
> neglight = unitise(vec(1, 3, -2));
> scene = create(level, vec(0, -1, 0), 1);
> printf("P5\n%d %d\n255\n", n, n);
> for (y=n-1; y>=0; --y)
> for (x=0; x<n; ++x) {
> real g=0.0;
> int dx, dy;
> for (dx=0; dx<ss; ++dx)
> for (dy=0; dy<ss; ++dy) {
> Vec d=unitise(vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n));
> g += ray_trace(vec(0, 0, -4), d, scene);
> }
> printf("%c", (char)(0.5 + 255.0 * g / (ss*ss)));
> }
> return 0;
> }
>
> --
> Dr Jon Harrop, Flying Frog Consultancy Ltd.
> http://www.ffconsultancy.com/?e
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
More information about the llvm-dev
mailing list