[llvm-commits] [llvm] r112379 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2009-02-26-MachineLICMBug.ll test/CodeGen/X86/sse41.ll test/CodeGen/X86/vec_insert-9.ll
Chris Lattner
sabre at nondot.org
Sat Aug 28 10:59:08 PDT 2010
Author: lattner
Date: Sat Aug 28 12:59:08 2010
New Revision: 112379
URL: http://llvm.org/viewvc/llvm-project?rev=112379&view=rev
Log:
fix the buildvector->insertp[sd] logic to not always create a redundant
insertp[sd] $0, which is a noop. Before:
_f32: ## @f32
pshufd $1, %xmm1, %xmm2
pshufd $1, %xmm0, %xmm3
addss %xmm2, %xmm3
addss %xmm1, %xmm0
## kill: XMM0<def> XMM0<kill> XMM0<def>
insertps $0, %xmm0, %xmm0
insertps $16, %xmm3, %xmm0
ret
after:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm3
addss %xmm1, %xmm3
movdqa %xmm2, %xmm0
insertps $16, %xmm3, %xmm0
ret
The extra movs are due to a random (poor) scheduling decision.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
llvm/trunk/test/CodeGen/X86/sse41.ll
llvm/trunk/test/CodeGen/X86/vec_insert-9.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=112379&r1=112378&r2=112379&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Aug 28 12:59:08 2010
@@ -4278,14 +4278,20 @@
if (LD.getNode())
return LD;
- // For SSE 4.1, use inserts into undef.
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
- V[0] = DAG.getUNDEF(VT);
- for (unsigned i = 0; i < NumElems; ++i)
- if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
- V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i));
- return V[0];
+ }
+ return Result;
}
// Otherwise, expand into a number of unpckl*, start by extending each of
Modified: llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll?rev=112379&r1=112378&r2=112379&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll Sat Aug 28 12:59:08 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
; rdar://6627786
; rdar://7792037
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=112379&r1=112378&r2=112379&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Sat Aug 28 12:59:08 2010
@@ -224,3 +224,28 @@
declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+; This used to compile to insertps $0 + insertps $16. insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
+ %tmp5 = extractelement <2 x float> %A, i32 1
+ %tmp3 = extractelement <2 x float> %B, i32 0
+ %tmp1 = extractelement <2 x float> %B, i32 1
+ %add.r = fadd float %tmp7, %tmp3
+ %add.i = fadd float %tmp5, %tmp1
+ %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+ %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+ ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+
Modified: llvm/trunk/test/CodeGen/X86/vec_insert-9.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-9.ll?rev=112379&r1=112378&r2=112379&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-9.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-9.ll Sat Aug 28 12:59:08 2010
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
-; RUN: grep pinsrd %t | count 2
+; RUN: grep pinsrd %t | count 1
define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
entry:
More information about the llvm-commits
mailing list