[LLVMdev] No SSE instructions

Duncan Sands baldrick at free.fr
Sun May 22 12:27:59 PDT 2011


Hi Serg,

> Next, I disasseble the executable file and have not found any SSE instructions.
> I know that LLVM support SSE.
> So my questions:
>    1. It is occur only in my computer?
>    2. If it is not only my bug, then there are not SSE optimizations in LLVM?
>    3. Have anyone, already worked on this problem?

the gcc-4.5 tree vectorizer vectorizes this (see LLVM IR below) but LLVM does
not yet have an auto-vectorizer that can do this.

Ciao, Duncan.

IR produced by dragonegg using -O3 and -fplugin-arg-dragonegg-enable-gcc-optzns:

target datalayout = 
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"

module asm "\09.ident\09\22GCC: (GNU) 4.5.4 20110506 (prerelease) LLVM: 131851M\22"

@v1 = common global [10000 x i32] zeroinitializer, align 32
@.cst = private constant [4 x i8] c"%d \00", align 8

define i32 @main() nounwind {
entry:
   br label %"<bb 3>"

"<bb 3>":                                         ; preds = %"<bb 3>", %entry
   %indvar2 = phi i64 [ %indvar.next3, %"<bb 3>" ], [ 0, %entry ]
   %vect_vec_iv_.8_10 = phi <4 x i32> [ %vect_vec_iv_.8_24, %"<bb 3>" ], [ <i32 
0, i32 1, i32 2, i32 3>, %entry ]
   %tmp6 = shl i64 %indvar2, 2
   %scevgep7 = getelementptr [10000 x i32]* @v1, i64 0, i64 %tmp6
   %scevgep78 = bitcast i32* %scevgep7 to <4 x i32>*
   %vect_vec_iv_.8_24 = add nsw <4 x i32> %vect_vec_iv_.8_10, <i32 4, i32 4, i32 
4, i32 4>
   store <4 x i32> %vect_vec_iv_.8_10, <4 x i32>* %scevgep78, align 16
   %indvar.next3 = add i64 %indvar2, 1
   %exitcond4 = icmp eq i64 %indvar.next3, 2500
   br i1 %exitcond4, label %"<bb 5>", label %"<bb 3>"

"<bb 5>":                                         ; preds = %"<bb 3>", %"<bb 5>"
   %indvar = phi i64 [ %indvar.next, %"<bb 5>" ], [ 0, %"<bb 3>" ]
   %scevgep = getelementptr [10000 x i32]* @v1, i64 0, i64 %indvar
   %D.3943_6 = load i32* %scevgep, align 4
   %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* 
@.cst, i64 0, i64 0), i32 %D.3943_6) nounwind
   %indvar.next = add i64 %indvar, 1
   %exitcond = icmp eq i64 %indvar.next, 10000
   br i1 %exitcond, label %"<bb 6>", label %"<bb 5>"

"<bb 6>":                                         ; preds = %"<bb 5>"
   ret i32 0
}

declare i32 @printf(i8* nocapture, ...) nounwind



More information about the llvm-dev mailing list