[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

lixiong hz hzlixiong at gmail.com
Wed Jan 4 07:43:11 PST 2012


I write a small function and test it under clang and gcc,

filet test.c:
double X[100];  double Y[100];  double DA = 0.3;
int f()
{
  int i;
  for (i = 0; i < 100; i++)
   Y[i] = Y[i] - DA * X[i];
  return 0;
}
clang -S -O3 -o test.s test.c -march=native -ccc-echo
result:
"D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32 -S
-disable-fr
e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static
-mdis
ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7
-momit-leaf-
rame-pointer -coverage-file test.s -resource-dir
"D:/work/trunk/bin/Release\\..
\lib\\clang\\3.1" -fmodule-cache-path
"C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\
lang-module-cache" -internal-isystem
D:/work/trunk/bin/Release/../lib/clang/3.1
include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio
9.0\\VC\\
nclude" -internal-isystem "C:\\Program Files\\Microsoft
SDKs\\Windows\\v6.0A\\\
include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign
-fms-extension
 -fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing
-fgnu-runtime
-fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi
-fdiagnostics
show-option -fcolor-diagnostics -o test.s -x c test.c

 .def  _f;
 .scl 2;
 .type 32;
 .endef
 .text
 .globl _f
 .align 16, 0x90
_f:                                     # @f
# BB#0:
 movl $-800, %eax             # imm = 0xFFFFFFFFFFFFFCE0
 movsd _DA, %xmm0
 .align 16, 0x90
LBB0_1:                                 # =>This Inner Loop Header: Depth=1
 movsd _X+800(%eax), %xmm1
 mulsd %xmm0, %xmm1
 movsd _Y+800(%eax), %xmm2
 subsd %xmm1, %xmm2
 movsd %xmm2, _Y+800(%eax)
 addl $8, %eax
 jne LBB0_1
# BB#2:
 xorl %eax, %eax
 ret
 .data
 .globl _DA                     # @DA
 .align 8
_DA:
 .quad 4599075939470750515     # double 3.000000e-01
 .comm _Y,800,3                # @Y
 .comm _X,800,3                # @X


gcc -S -O3 -o test2.s test.c -march=native
result:
 .file "test.c"
 .text
 .p2align 4,,15
.globl _f
 .def _f; .scl 2; .type 32; .endef
_f:
 pushl %ebp
 movddup _DA, %xmm2
 movl %esp, %ebp
 xorl %eax, %eax
 .p2align 4,,10
L2:
 movapd _Y(%eax), %xmm0
 movapd _X(%eax), %xmm1
 mulpd %xmm2, %xmm1
 subpd %xmm1, %xmm0
 movapd %xmm0, _Y(%eax)
 addl $16, %eax
 cmpl $800, %eax
 jne L2
 xorw %ax, %ax
 leave
 ret
.globl _DA
 .data
 .align 16
_DA:
 .long 858993459
 .long 1070805811
 .comm _X, 800, 5
 .comm _Y, 800, 5

It seems gcc emit more effectivenss instuction. Are there any clang command
arguments to get the similar result?
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120104/96cd942c/attachment.html>


More information about the llvm-dev mailing list