[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

Duncan Sands baldrick at free.fr
Wed Jan 4 08:09:52 PST 2012


Hi lixiong hz, clang doesn't have an autovectorizer yet, so with clang for the
moment you have to use vectors explicitly in your C code.  However dragonegg can
produce vector code using the -fplugin-arg-dragonegg-enable-gcc-optzns (it is
actually the gcc optimizers that do the vectorizing).

Ciao, Duncan.

On 04/01/12 16:43, lixiong hz wrote:
> I write a small function and test it under clang and gcc,
> filet test.c:
> double X[100];  double Y[100];  double DA = 0.3;
> int f()
> {
>    int i;
>    for (i = 0; i < 100; i++)
>     Y[i] = Y[i] - DA * X[i];
>    return 0;
> }
> clang -S -O3 -o test.s test.c -march=native -ccc-echo
> result:
> "D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32 -S -disable-fr
> e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static -mdis
> ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7 -momit-leaf-
> rame-pointer -coverage-file test.s -resource-dir "D:/work/trunk/bin/Release\\..
> \lib\\clang\\3.1" -fmodule-cache-path "C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\
> lang-module-cache" -internal-isystem D:/work/trunk/bin/Release/../lib/clang/3.1
> include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio 9.0\\VC\\
> nclude" -internal-isystem "C:\\Program Files\\Microsoft SDKs\\Windows\\v6.0A\\\
> include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign -fms-extension
>   -fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing -fgnu-runtime
> -fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi -fdiagnostics
> show-option -fcolor-diagnostics -o test.s -x c test.c
>   .def  _f;
>   .scl 2;
>   .type 32;
>   .endef
>   .text
>   .globl _f
>   .align 16, 0x90
> _f:                                     # @f
> # BB#0:
>   movl $-800, %eax             # imm = 0xFFFFFFFFFFFFFCE0
>   movsd _DA, %xmm0
>   .align 16, 0x90
> LBB0_1:                                 # =>This Inner Loop Header: Depth=1
>   movsd _X+800(%eax), %xmm1
>   mulsd %xmm0, %xmm1
>   movsd _Y+800(%eax), %xmm2
>   subsd %xmm1, %xmm2
>   movsd %xmm2, _Y+800(%eax)
>   addl $8, %eax
>   jne LBB0_1
> # BB#2:
>   xorl %eax, %eax
>   ret
>   .data
>   .globl _DA                     # @DA
>   .align 8
> _DA:
>   .quad 4599075939470750515     # double 3.000000e-01
>   .comm _Y,800,3                # @Y
>   .comm _X,800,3                # @X
> gcc -S -O3 -o test2.s test.c -march=native
> result:
>   .file "test.c"
>   .text
>   .p2align 4,,15
> .globl _f
>   .def _f; .scl 2; .type 32; .endef
> _f:
>   pushl %ebp
>   movddup _DA, %xmm2
>   movl %esp, %ebp
>   xorl %eax, %eax
>   .p2align 4,,10
> L2:
>   movapd _Y(%eax), %xmm0
>   movapd _X(%eax), %xmm1
>   mulpd %xmm2, %xmm1
>   subpd %xmm1, %xmm0
>   movapd %xmm0, _Y(%eax)
>   addl $16, %eax
>   cmpl $800, %eax
>   jne L2
>   xorw %ax, %ax
>   leave
>   ret
> .globl _DA
>   .data
>   .align 16
> _DA:
>   .long 858993459
>   .long 1070805811
>   .comm _X, 800, 5
>   .comm _Y, 800, 5
> It seems gcc emit more effectivenss instuction. Are there any clang command
> arguments to get the similar result?
>
>
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev




More information about the llvm-dev mailing list