[llvm] r223660 - [X86] Improved lowering of packed v8i16 vector shifts by non-constant count.
Andrea Di Biagio
Andrea_DiBiagio at sn.scee.net
Mon Dec 8 06:36:51 PST 2014
Author: adibiagio
Date: Mon Dec 8 08:36:51 2014
New Revision: 223660
URL: http://llvm.org/viewvc/llvm-project?rev=223660&view=rev
Log:
[X86] Improved lowering of packed v8i16 vector shifts by non-constant count.
Before this patch, the backend sub-optimally expanded the non-constant shift
count of a v8i16 shift into a sequence of two 'movd' plus 'movzwl'.
With this patch the backend checks if the target features sse4.1. If so, then
it lets the shuffle legalizer deal with the expansion of the shift amount.
Example:
;;
define <8 x i16> @test(<8 x i16> %A, <8 x i16> %B) {
%shamt = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
%shl = shl <8 x i16> %A, %shamt
ret <8 x i16> %shl
}
;;
Before (with -mattr=+avx):
vmovd %xmm1, %eax
movzwl %ax, %eax
vmovd %eax, %xmm1
vpsllw %xmm1, %xmm0, %xmm0
retq
Now:
vpxor %xmm2, %xmm2, %xmm2
vpblendw $1, %xmm1, %xmm2, %xmm1
vpsllw %xmm1, %xmm0, %xmm0
retq
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/lower-vec-shift-2.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=223660&r1=223659&r2=223660&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 8 08:36:51 2014
@@ -16720,18 +16720,28 @@ static SDValue getTargetVShiftNode(unsig
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
- // Need to build a vector containing shift amount.
- // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- SmallVector<SDValue, 4> ShOps;
- ShOps.push_back(ShAmt);
- if (SVT == MVT::i32) {
- ShOps.push_back(DAG.getConstant(0, SVT));
+ const X86Subtarget &Subtarget =
+ DAG.getTarget().getSubtarget<X86Subtarget>();
+ if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
+ // Let the shuffle legalizer expand this shift amount node.
+ SDValue Op0 = ShAmt.getOperand(0);
+ Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
+ ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG);
+ } else {
+ // Need to build a vector containing shift amount.
+ // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
+ SmallVector<SDValue, 4> ShOps;
+ ShOps.push_back(ShAmt);
+ if (SVT == MVT::i32) {
+ ShOps.push_back(DAG.getConstant(0, SVT));
+ ShOps.push_back(DAG.getUNDEF(SVT));
+ }
ShOps.push_back(DAG.getUNDEF(SVT));
- }
- ShOps.push_back(DAG.getUNDEF(SVT));
- MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
+ MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
+ }
// The return type has to be a 128-bit type with the same element
// type as the input type.
Modified: llvm/trunk/test/CodeGen/X86/lower-vec-shift-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lower-vec-shift-2.ll?rev=223660&r1=223659&r2=223660&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lower-vec-shift-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lower-vec-shift-2.ll Mon Dec 8 08:36:51 2014
@@ -11,9 +11,8 @@ define <8 x i16> @test1(<8 x i16> %A, <8
; SSE2-NEXT: retq
; AVX-LABEL: test1:
; AVX: # BB#0
-; AVX-NEXT: vmovd %xmm1, %eax
-; AVX-NEXT: movzwl %ax, %eax
-; AVX-NEXT: vmovd %eax, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
@@ -66,9 +65,8 @@ define <8 x i16> @test4(<8 x i16> %A, <8
; SSE2-NEXT: retq
; AVX-LABEL: test4:
; AVX: # BB#0
-; AVX-NEXT: vmovd %xmm1, %eax
-; AVX-NEXT: movzwl %ax, %eax
-; AVX-NEXT: vmovd %eax, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
@@ -121,9 +119,8 @@ define <8 x i16> @test7(<8 x i16> %A, <8
; SSE2-NEXT: retq
; AVX-LABEL: test7:
; AVX: # BB#0
-; AVX-NEXT: vmovd %xmm1, %eax
-; AVX-NEXT: movzwl %ax, %eax
-; AVX-NEXT: vmovd %eax, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
More information about the llvm-commits
mailing list