[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?
lixiong hz
hzlixiong at gmail.com
Wed Jan 4 07:43:11 PST 2012
I write a small function and test it under clang and gcc,
filet test.c:
double X[100]; double Y[100]; double DA = 0.3;
int f()
{
int i;
for (i = 0; i < 100; i++)
Y[i] = Y[i] - DA * X[i];
return 0;
}
clang -S -O3 -o test.s test.c -march=native -ccc-echo
result:
"D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32 -S
-disable-fr
e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static
-mdis
ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7
-momit-leaf-
rame-pointer -coverage-file test.s -resource-dir
"D:/work/trunk/bin/Release\\..
\lib\\clang\\3.1" -fmodule-cache-path
"C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\
lang-module-cache" -internal-isystem
D:/work/trunk/bin/Release/../lib/clang/3.1
include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio
9.0\\VC\\
nclude" -internal-isystem "C:\\Program Files\\Microsoft
SDKs\\Windows\\v6.0A\\\
include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign
-fms-extension
-fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing
-fgnu-runtime
-fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi
-fdiagnostics
show-option -fcolor-diagnostics -o test.s -x c test.c
.def _f;
.scl 2;
.type 32;
.endef
.text
.globl _f
.align 16, 0x90
_f: # @f
# BB#0:
movl $-800, %eax # imm = 0xFFFFFFFFFFFFFCE0
movsd _DA, %xmm0
.align 16, 0x90
LBB0_1: # =>This Inner Loop Header: Depth=1
movsd _X+800(%eax), %xmm1
mulsd %xmm0, %xmm1
movsd _Y+800(%eax), %xmm2
subsd %xmm1, %xmm2
movsd %xmm2, _Y+800(%eax)
addl $8, %eax
jne LBB0_1
# BB#2:
xorl %eax, %eax
ret
.data
.globl _DA # @DA
.align 8
_DA:
.quad 4599075939470750515 # double 3.000000e-01
.comm _Y,800,3 # @Y
.comm _X,800,3 # @X
gcc -S -O3 -o test2.s test.c -march=native
result:
.file "test.c"
.text
.p2align 4,,15
.globl _f
.def _f; .scl 2; .type 32; .endef
_f:
pushl %ebp
movddup _DA, %xmm2
movl %esp, %ebp
xorl %eax, %eax
.p2align 4,,10
L2:
movapd _Y(%eax), %xmm0
movapd _X(%eax), %xmm1
mulpd %xmm2, %xmm1
subpd %xmm1, %xmm0
movapd %xmm0, _Y(%eax)
addl $16, %eax
cmpl $800, %eax
jne L2
xorw %ax, %ax
leave
ret
.globl _DA
.data
.align 16
_DA:
.long 858993459
.long 1070805811
.comm _X, 800, 5
.comm _Y, 800, 5
It seems gcc emit more effectivenss instuction. Are there any clang command
arguments to get the similar result?
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120104/96cd942c/attachment.html>
More information about the llvm-dev
mailing list