[LLVMdev] Another compiler shootout
Jon Harrop
jon at ffconsultancy.com
Mon Dec 15 20:26:40 PST 2008
On Tuesday 16 December 2008 01:03:36 Evan Cheng wrote:
> FYI. http://leonardo-m.livejournal.com/73732.html
>
> If anyone is motivated, please file bugs for the losing cases. Also,
> it might make sense to incorporate the tests into our nightly tester
> test suite.
FWIW, I just ported my ray tracer benchmark to C and found that llvm-gcc gives
much worse performance than gcc on x86 but not on x86-64 on an Opteron:
2.1GHz Opteron
32-bit
gcc 4.3.2: 5.60s (gcc -Wall -O3 -lm ray.c -o ray)
llvm-gcc 4.2.1: 9.00s (llvm-gcc -O3 -march=opteron -msse2 -lm ray.c -o ray)
64-bit
gcc 4.3.2: 4.18s (gcc -Wall -O3 -lm ray.c -o ray)
llvm-gcc 4.2.1: 5.00s (llvm-gcc -O3 -march=opteron -msse2 -lm ray.c -o ray)
Note that the LLVM-generated code is 60% slower than GCC's in the first case.
I am unfamiliar with x86 assembler but I believe the problem is that LLVM is
calling a function for fsqrt rather than using the x86 op-code. Should I be
passing some command line arguments or using a newer llvm-gcc to get it to
emit fsqrt or is that not yet implemented?
Benchmark was:
time ./ray 9 512 >image.pgm
Compile times go down from 0.36s to 0.13s on x86 and 0.35s to 0.19s on x86-64
as expected.
Here's the code:
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define real float
#define epsilon FLT_EPSILON
//#define real double
//#define epsilon DBL_EPSILON
real delta, INFINITY;
typedef struct { real x, y, z; } Vec;
Vec vec(real x, real y, real z) { Vec r; r.x=x; r.y=y; r.z=z; return r; }
Vec add(const Vec a, const Vec b) { return vec(a.x+b.x, a.y+b.y, a.z+b.z); }
Vec sub(const Vec a, const Vec b) { return vec(a.x-b.x, a.y-b.y, a.z-b.z); }
Vec scale(real a, const Vec b) { return vec(a*b.x, a*b.y, a*b.z); }
real dot(const Vec a, const Vec b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
Vec unitise(const Vec a) { return scale((1.0 / sqrt(dot(a, a))), a); }
struct Scene {
Vec center;
real radius;
struct Scene *child;
};
real ray_sphere(Vec o, Vec d, Vec c, real r) {
Vec v = sub(c, o);
real b = dot(v, d), disc = b*b - dot(v, v) + r*r, t1, t2;
if (disc < 0.0) return INFINITY;
disc = sqrt(disc);
t2 = b + disc;
if (t2 < 0.0) return INFINITY;
t1 = b - disc;
return (t1 > 0.0 ? t1 : t2);
}
void intersect(Vec o, Vec d, real *lambda, struct Scene **t, struct Scene
*scene) {
real lambda2 = ray_sphere(o, d, scene->center, scene->radius);
if (lambda2 < *lambda) {
if (scene->child) {
int i;
for (i=0; i<5; ++i)
intersect(o, d, lambda, t, &scene->child[i]);
} else {
*lambda = lambda2;
*t = scene;
}
}
}
Vec neglight;
real ray_trace(Vec o, Vec d, struct Scene scene) {
real lambda = INFINITY;
struct Scene *t = NULL;
intersect(o, d, &lambda, &t, &scene);
if (lambda == INFINITY) return 0.0;
{
Vec p = add(o, scale(lambda, d));
Vec normal = unitise(sub(p, t->center));
real g = dot(normal, neglight);
if (g <= 0.0) return 0.0;
p = add(p, scale(delta, normal));
lambda = INFINITY;
intersect(p, neglight, &lambda, &t, &scene);
return (lambda < INFINITY ? 0.0 : g);
}
}
struct Scene create(int level, Vec c, real r) {
struct Scene scene;
scene.center = c;
if (level == 1) {
scene.radius = r;
scene.child = NULL;
} else {
real rn = 3*r/sqrt(12);
scene.radius = 3*r;
scene.child = (struct Scene *)malloc(5*sizeof(struct Scene));
scene.child[0] = create(1, c, r);
scene.child[1] = create(level-1, add(c, scale(rn, vec(-1, 1, -1))), r/2);
scene.child[2] = create(level-1, add(c, scale(rn, vec( 1, 1, -1))), r/2);
scene.child[3] = create(level-1, add(c, scale(rn, vec(-1, 1, 1))), r/2);
scene.child[4] = create(level-1, add(c, scale(rn, vec( 1, 1, 1))), r/2);
}
return scene;
}
int main(int argc, char *argv[]) {
struct Scene scene;
int level, n, ss=4, x, y;
level = (argc==3 ? atoi(argv[1]) : 9);
n = (argc==3 ? atoi(argv[2]) : 512);
delta = sqrt(epsilon);
INFINITY = 1.0 / 0.0;
neglight = unitise(vec(1, 3, -2));
scene = create(level, vec(0, -1, 0), 1);
printf("P5\n%d %d\n255\n", n, n);
for (y=n-1; y>=0; --y)
for (x=0; x<n; ++x) {
real g=0.0;
int dx, dy;
for (dx=0; dx<ss; ++dx)
for (dy=0; dy<ss; ++dy) {
Vec d=unitise(vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n));
g += ray_trace(vec(0, 0, -4), d, scene);
}
printf("%c", (char)(0.5 + 255.0 * g / (ss*ss)));
}
return 0;
}
--
Dr Jon Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/?e
More information about the llvm-dev
mailing list