[llvm] r201558 - X86: use vpsllvd (& friends) for 16-bit shifts on Haswell
Tim Northover
tnorthover at apple.com
Tue Feb 18 03:15:33 PST 2014
Author: tnorthover
Date: Tue Feb 18 05:15:32 2014
New Revision: 201558
URL: http://llvm.org/viewvc/llvm-project?rev=201558&view=rev
Log:
X86: use vpsllvd (& friends) for 16-bit shifts on Haswell
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx2-shift.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=201558&r1=201557&r2=201558&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 18 05:15:32 2014
@@ -13161,6 +13161,7 @@ static SDValue LowerShift(SDValue Op, co
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
+
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
@@ -13204,6 +13205,19 @@ static SDValue LowerShift(SDValue Op, co
return R;
}
+ // It's worth extending once and using the v8i32 shifts for 16-bit types, but
+ // the extra overheads to get from v16i8 to v8i32 make the existing SSE
+ // solution better.
+ if (Subtarget->hasInt256() && VT == MVT::v8i16) {
+ MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
+ unsigned ExtOpc =
+ Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ R = DAG.getNode(ExtOpc, dl, NewVT, R);
+ Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
+ }
+
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
Modified: llvm/trunk/test/CodeGen/X86/avx2-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-shift.ll?rev=201558&r1=201557&r2=201558&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-shift.ll Tue Feb 18 05:15:32 2014
@@ -266,3 +266,36 @@ define <8 x i32> @sext_v8i32(<8 x i32> %
%c = sext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}
+
+define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: variable_shl16:
+; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
+; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
+; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
+; CHECK: vpshufb
+; CHECK: vpermq
+ %res = shl <8 x i16> %lhs, %rhs
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: variable_ashr16:
+; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
+; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
+; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
+; CHECK: vpshufb
+; CHECK: vpermq
+ %res = ashr <8 x i16> %lhs, %rhs
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: variable_lshr16:
+; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
+; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
+; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
+; CHECK: vpshufb
+; CHECK: vpermq
+ %res = lshr <8 x i16> %lhs, %rhs
+ ret <8 x i16> %res
+}
\ No newline at end of file
More information about the llvm-commits
mailing list