[llvm-commits] [llvm] r48090 - in /llvm/trunk: lib/Target/X86/README-SSE.txt lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_set-B.ll
Chris Lattner
sabre at nondot.org
Sat Mar 8 21:42:06 PST 2008
Author: lattner
Date: Sat Mar 8 23:42:06 2008
New Revision: 48090
URL: http://llvm.org/viewvc/llvm-project?rev=48090&view=rev
Log:
Finish implementing a readme entry: when inserting an i64 variable
into a vector of zeros or undef, and when the top part is obviously
zero, we can just use movd + shuffle. This allows us to compile
vec_set-B.ll into:
_test3:
movl $1234567, %eax
andl 4(%esp), %eax
movd %eax, %xmm0
ret
instead of:
_test3:
subl $28, %esp
movl $1234567, %eax
andl 32(%esp), %eax
movl %eax, (%esp)
movl $0, 4(%esp)
movq (%esp), %xmm0
addl $28, %esp
ret
Added:
llvm/trunk/test/CodeGen/X86/vec_set-B.ll
Modified:
llvm/trunk/lib/Target/X86/README-SSE.txt
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/README-SSE.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README-SSE.txt?rev=48090&r1=48089&r2=48090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/README-SSE.txt (original)
+++ llvm/trunk/lib/Target/X86/README-SSE.txt Sat Mar 8 23:42:06 2008
@@ -781,41 +781,3 @@
just a matter of matching (scalar_to_vector (load x)) to movd.
//===---------------------------------------------------------------------===//
-
-These two functions should compile to identical code on x86-32:
-
-define <2 x i64> @test2(i64 %arg) {
-entry:
- %A = and i64 %arg, 1234567
- %B = insertelement <2 x i64> undef, i64 %A, i32 0
- ret <2 x i64> %B
-}
-
-define <2 x i64> @test2(i64 %arg) {
-entry:
- %A = and i64 %arg, 1234567
- %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
- ret <2 x i64> %B
-}
-
-The later compiles to:
-
-_test2:
- movl $1234567, %eax
- andl 4(%esp), %eax
- movd %eax, %xmm0
- ret
-
-the former compiles to:
-
-_test2:
- subl $28, %esp
- movl $1234567, %eax
- andl 32(%esp), %eax
- movl %eax, (%esp)
- movl $0, 4(%esp)
- movaps (%esp), %xmm0
- addl $28, %esp
- ret
-
-//===---------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=48090&r1=48089&r2=48090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 8 23:42:06 2008
@@ -3063,11 +3063,7 @@
return DAG.getNode(ISD::UNDEF, VT);
}
- // Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
- return SDOperand();
-
- // Special case for single non-zero element.
+ // Special case for single non-zero, non-undef, element.
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
@@ -3141,6 +3137,10 @@
}
}
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1)
+ return SDOperand();
+
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)
Added: llvm/trunk/test/CodeGen/X86/vec_set-B.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-B.ll?rev=48090&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_set-B.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_set-B.ll Sat Mar 8 23:42:06 2008
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep movaps
+; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2
+
+; These should both generate something like this:
+;_test3:
+; movl $1234567, %eax
+; andl 4(%esp), %eax
+; movd %eax, %xmm0
+; ret
+
+define <2 x i64> @test3(i64 %arg) {
+entry:
+ %A = and i64 %arg, 1234567
+ %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
+ ret <2 x i64> %B
+}
+
+define <2 x i64> @test2(i64 %arg) {
+entry:
+ %A = and i64 %arg, 1234567
+ %B = insertelement <2 x i64> undef, i64 %A, i32 0
+ ret <2 x i64> %B
+}
+
More information about the llvm-commits
mailing list