[LLVMdev] use AVX automatically if present
Henning Thielemann
llvm at henning-thielemann.de
Thu May 24 13:19:41 PDT 2012
I wonder why AVX is not used automatically if available at the host
machine. In contrast to that, SSE41 instructions (like pmulld) are
automatically used if the host machine supports SSE41.
E.g.
$ cat avx.ll
define void @_fun1(<8 x float>*, <8 x float>*) {
_L1:
%x = load <8 x float>* %0
%y = load <8 x float>* %1
%z = fadd <8 x float> %x, %y
store <8 x float> %z, <8 x float>* %0
ret void
}
$ llc -o - avx.ll
.file "avx.ll"
.text
.globl _fun1
.align 16, 0x90
.type _fun1, at function
_fun1: # @_fun1
.cfi_startproc
# BB#0: # %_L1
movaps (%rdi), %xmm0
movaps 16(%rdi), %xmm1
addps (%rsi), %xmm0
addps 16(%rsi), %xmm1
movaps %xmm1, 16(%rdi)
movaps %xmm0, (%rdi)
ret
.Ltmp0:
.size _fun1, .Ltmp0-_fun1
.cfi_endproc
.section ".note.GNU-stack","", at progbits
$ llc -o - -mattr avx avx.ll
.file "avx.ll"
.text
.globl _fun1
.align 16, 0x90
.type _fun1, at function
_fun1: # @_fun1
.cfi_startproc
# BB#0: # %_L1
pushq %rbp
.Ltmp2:
.cfi_def_cfa_offset 16
.Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
.Ltmp4:
.cfi_def_cfa_register %rbp
vmovaps (%rdi), %ymm0
vaddps (%rsi), %ymm0, %ymm0
vmovaps %ymm0, (%rdi)
popq %rbp
vzeroupper
ret
.Ltmp5:
.size _fun1, .Ltmp5-_fun1
.cfi_endproc
.section ".note.GNU-stack","", at progbits
I guess your answer is that I did not specify a target triple. However why
is SSE41 automatically detected and AVX is not?
More information about the llvm-dev
mailing list