[LLVMdev] Why the same code is much slower in JIT compared to separate executable?

Thu Apr 29 01:44:52 PDT 2010

I run the same simple Fibonacci computing code in JIT and as a native 
executable. I see that with argument 45 JIT runs for 11.3sec and 
executable runs for 7.5sec.
Why there is such difference?

Yuri

-------- fib.ll --------
; ModuleID = 'all.bc'

@.str = private constant [12 x i8] c"fib(%i)=%i\0A\00", align 1 ; <[12 x 
i8]*> [#uses=1]

define i32 @fib(i32 %AnArg) {
EntryBlock:
 %cond = icmp sle i32 %AnArg, 2                  ; <i1> [#uses=1]
 br i1 %cond, label %return, label %recurse

return:                                           ; preds = %EntryBlock
 ret i32 1

recurse:                                          ; preds = %EntryBlock
 %arg = sub i32 %AnArg, 1                        ; <i32> [#uses=1]
 %fibx1 = tail call i32 @fib(i32 %arg)           ; <i32> [#uses=1]
 %arg1 = sub i32 %AnArg, 2                       ; <i32> [#uses=1]
 %fibx2 = tail call i32 @fib(i32 %arg1)          ; <i32> [#uses=1]
 %addresult = add i32 %fibx1, %fibx2             ; <i32> [#uses=1]
 ret i32 %addresult
}

define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
 %0 = getelementptr inbounds i8** %argv, i32 1   ; <i8**> [#uses=1]
 %1 = load i8** %0, align 4                      ; <i8*> [#uses=1]
 %2 = tail call i32 @atoi(i8* %1) nounwind       ; <i32> [#uses=2]
 %3 = tail call i32 @fib(i32 %2) nounwind        ; <i32> [#uses=1]
 %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 
x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind ; <i32> [#uses=0]
 ret i32 undef
}

declare i32 @atoi(i8* nocapture) nounwind readonly

declare i32 @printf(i8* nocapture, ...) nounwind

-------- run-jit shell script --------
llvm-as fib.ll && \
time lli -O3 fib.bc 45

-------- run-exe shell script --------
llvm-as fib.ll && \
llc -O3 fib.bc -o fib.s && \
as fib.s -o fib.o && \
gcc -o fib fib.o && \
time fib 45