[llvm-commits] [llvm] r109549 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/vec_shift4.ll
Nate Begeman
natebegeman at mac.com
Tue Jul 27 15:37:06 PDT 2010
Author: sampo
Date: Tue Jul 27 17:37:06 2010
New Revision: 109549
URL: http://llvm.org/viewvc/llvm-project?rev=109549&view=rev
Log:
~40% faster vector shl <4 x i32> on SSE 4.1 Larger improvements for smaller types coming in future patches.
For:
define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
%shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
We get:
_shl: ## @shl
pslld $23, %xmm1
paddd LCPI0_0, %xmm1
cvttps2dq %xmm1, %xmm1
pmulld %xmm1, %xmm0
ret
Instead of:
_shl: ## @shl
pshufd $3, %xmm0, %xmm2
movd %xmm2, %eax
pshufd $3, %xmm1, %xmm2
movd %xmm2, %ecx
shll %cl, %eax
movd %eax, %xmm2
pshufd $1, %xmm0, %xmm3
movd %xmm3, %eax
pshufd $1, %xmm1, %xmm3
movd %xmm3, %ecx
shll %cl, %eax
movd %eax, %xmm3
punpckldq %xmm2, %xmm3
movd %xmm0, %eax
movd %xmm1, %ecx
shll %cl, %eax
movd %eax, %xmm2
movhlps %xmm0, %xmm0
movd %xmm0, %eax
movhlps %xmm1, %xmm1
movd %xmm1, %ecx
shll %cl, %eax
movd %eax, %xmm0
punpckldq %xmm0, %xmm2
movdqa %xmm2, %xmm0
punpckldq %xmm3, %xmm0
ret
Added:
llvm/trunk/test/CodeGen/X86/vec_shift4.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=109549&r1=109548&r2=109549&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 27 17:37:06 2010
@@ -838,6 +838,9 @@
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ // Can turn SHL into an integer multiply.
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
@@ -7498,6 +7501,35 @@
return Res;
}
+SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+
+ assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+ assert(VT == MVT::v4i32 && "Only know how to lower v4i32");
+
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+ std::vector<Constant*> CV;
+ LLVMContext *Context = DAG.getContext();
+ CV.push_back(ConstantInt::get(*Context, APInt(32, 0x3f800000U)));
+ CV.push_back(ConstantInt::get(*Context, APInt(32, 0x3f800000U)));
+ CV.push_back(ConstantInt::get(*Context, APInt(32, 0x3f800000U)));
+ CV.push_back(ConstantInt::get(*Context, APInt(32, 0x3f800000U)));
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+}
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
@@ -7730,6 +7762,7 @@
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SHL: return LowerSHL(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=109549&r1=109548&r2=109549&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jul 27 17:37:06 2010
@@ -723,6 +723,7 @@
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
Added: llvm/trunk/test/CodeGen/X86/vec_shift4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shift4.ll?rev=109549&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shift4.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_shift4.ll Tue Jul 27 17:37:06 2010
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+
+define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shll
+; CHECK: pslld
+; CHECK: paddd
+; CHECK: cvttps2dq
+; CHECK: pmulld
+
+ %shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
+ %tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp2
+}
More information about the llvm-commits
mailing list