[llvm-commits] [llvm] r113116 - /llvm/trunk/lib/Target/X86/README-SSE.txt
Chris Lattner
sabre at nondot.org
Sun Sep 5 13:22:09 PDT 2010
Author: lattner
Date: Sun Sep 5 15:22:09 2010
New Revision: 113116
URL: http://llvm.org/viewvc/llvm-project?rev=113116&view=rev
Log:
update this.
Modified:
llvm/trunk/lib/Target/X86/README-SSE.txt
Modified: llvm/trunk/lib/Target/X86/README-SSE.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README-SSE.txt?rev=113116&r1=113115&r2=113116&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/README-SSE.txt (original)
+++ llvm/trunk/lib/Target/X86/README-SSE.txt Sun Sep 5 15:22:09 2010
@@ -20,7 +20,28 @@
//===---------------------------------------------------------------------===//
SSE has instructions for doing operations on complex numbers, we should pattern
-match them. Compiling this:
+match them. For example, this should turn into a horizontal add:
+
+typedef float __attribute__((vector_size(16))) v4f32;
+float f32(v4f32 A) {
+ return A[0]+A[1]+A[2]+A[3];
+}
+
+Instead we get this:
+
+_f32: ## @f32
+ pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0]
+ addss %xmm0, %xmm1
+ pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0]
+ movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1]
+ movaps %xmm0, %xmm3
+ addss %xmm1, %xmm3
+ movdqa %xmm2, %xmm0
+ addss %xmm3, %xmm0
+ ret
+
+Also, there are cases where some simple local SLP would improve codegen a bit.
+compiling this:
_Complex float f32(_Complex float A, _Complex float B) {
return A+B;
@@ -28,19 +49,17 @@
into:
-_f32:
+_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
- pshufd $16, %xmm2, %xmm2
- pshufd $1, %xmm1, %xmm1
- pshufd $1, %xmm0, %xmm0
- addss %xmm1, %xmm0
- pshufd $16, %xmm0, %xmm1
- movdqa %xmm2, %xmm0
- unpcklps %xmm1, %xmm0
+ pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0]
+ pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0]
+ addss %xmm1, %xmm3
+ movaps %xmm2, %xmm0
+ unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
ret
-seems silly.
+seems silly when it could just be one addps.
//===---------------------------------------------------------------------===//
More information about the llvm-commits
mailing list