[LLVMdev] Vectorized LLVM IR
Stéphane Letz
letz at grame.fr
Sat May 29 01:31:17 PDT 2010
>
> Oh, and you might also want to check that you're actually getting SSE
> code; if you accidentally disable SSE somehow, you'll end up with x87
> code, which will completely expand the vectors into scalars.
>
> -Eli
Using <4 x float> types:
faust -llvm -vec -vs 4 -lv 1 plus.dsp -o plus_vec.ll
define void @llvm_compute(%struct.llvm_dsp* noalias %obj, i32 %count, <4 x float>** noalias %inputs, <4 x float>** noalias %outputs) nounwind readnone ssp {
entry:
%input_array_ptr0 = getelementptr inbounds <4 x float>** %inputs, i64 0
%input0 = load <4 x float>** %input_array_ptr0
%input_array_ptr1 = getelementptr inbounds <4 x float>** %inputs, i64 1
%input1 = load <4 x float>** %input_array_ptr1
%input_array_ptr2 = getelementptr inbounds <4 x float>** %inputs, i64 2
%input2 = load <4 x float>** %input_array_ptr2
%input_array_ptr3 = getelementptr inbounds <4 x float>** %inputs, i64 3
%input3 = load <4 x float>** %input_array_ptr3
%output_array_ptr0 = getelementptr inbounds <4 x float>** %outputs, i64 0
%output0 = load <4 x float>** %output_array_ptr0
%out = icmp sgt i32 %count, 0
br i1 %out, label %convert, label %return
convert:
%count_64 = zext i32 %count to i64
br label %loop0
loop0:
%indvar = phi i64 [ 0, %convert ], [ %indvar.next, %loop0 ]
%output_ptr0 = getelementptr <4 x float>* %output0, i64 %indvar
%input_ptr1 = getelementptr <4 x float>* %input1, i64 %indvar
%fVector0 = load <4 x float>* %input_ptr1, align 16;
%input_ptr0 = getelementptr <4 x float>* %input0, i64 %indvar
%fVector1 = load <4 x float>* %input_ptr0, align 16;
%fVector2 = fadd <4 x float> %fVector1, %fVector0;
%input_ptr3 = getelementptr <4 x float>* %input3, i64 %indvar
%fVector3 = load <4 x float>* %input_ptr3, align 16;
%input_ptr2 = getelementptr <4 x float>* %input2, i64 %indvar
%fVector4 = load <4 x float>* %input_ptr2, align 16;
%fVector5 = fadd <4 x float> %fVector4, %fVector3;
%fVector6 = fmul <4 x float> %fVector5, %fVector2;
store <4 x float> %fVector6, <4 x float>* %output_ptr0, align 16
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %count_64
br i1 %exitcond, label %return, label %loop0
return:
ret void
}
Then llc -O3 plus_vec.ll gives :
_llvm_compute: ## @llvm_compute
## BB#0: ## %entry
testl %esi, %esi
jle LBB8_3
## BB#1: ## %convert
movq (%rcx), %rax
movq 24(%rdx), %rcx
movq 16(%rdx), %rdi
movq 8(%rdx), %r8
movq (%rdx), %rdx
xorl %r9d, %r9d
movl %esi, %esi
.align 4, 0x90
LBB8_2: ## %loop0
## Loop Depth 1
## Loop Header
## Inner Loop
movaps (%rdx,%r9), %xmm0
movaps (%rdi,%r9), %xmm1
addps (%r8,%r9), %xmm0
addps (%rcx,%r9), %xmm1
mulps %xmm0, %xmm1
movaps %xmm1, (%rax,%r9)
addq $16, %r9
decq %rsi
jne LBB8_2
LBB8_3: ## %return
ret
So generated code seems correct.
Thanks
Stéphane Letz
More information about the llvm-dev
mailing list