[llvm] r352883 - [X86][SSE] Use PSLLDQ/PSRLDQ to mask out zeroable ends of a shuffle
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 1 08:02:13 PST 2019
Author: rksimon
Date: Fri Feb 1 08:02:12 2019
New Revision: 352883
URL: http://llvm.org/viewvc/llvm-project?rev=352883&view=rev
Log:
[X86][SSE] Use PSLLDQ/PSRLDQ to mask out zeroable ends of a shuffle
As suggested on PR40318, this patch uses PSLLDQ/PSRLDQ to lower shuffles to zero out the ends of a vector, leaving a sequential inner section.
For pre-SSSE3 we do this for shuffles with zeros at either end (requiring up to 3 shifts), but once PSHUFB is available I've limited this to shuffles with a single zeroable end (2 shifts).
Differential Revision: https://reviews.llvm.org/D56784
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/buildvec-extract.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352883&r1=352882&r2=352883&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 1 08:02:12 2019
@@ -10935,6 +10935,69 @@ static SDValue lowerShuffleAsRotate(cons
DAG.getConstant(Rotation, DL, MVT::i8));
}
+/// Try to lower a vector shuffle as a byte shift sequence.
+static SDValue lowerVectorShuffleAsByteShiftMask(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+ assert(VT.is128BitVector() && "Only 128-bit vectors supported");
+
+ // We need a shuffle that has zeros at one/both ends and a sequential
+ // shuffle from one source within.
+ unsigned ZeroLo = Zeroable.countTrailingOnes();
+ unsigned ZeroHi = Zeroable.countLeadingOnes();
+ if (!ZeroLo && !ZeroHi)
+ return SDValue();
+
+ unsigned NumElts = Mask.size();
+ unsigned Len = NumElts - (ZeroLo + ZeroHi);
+ if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo]))
+ return SDValue();
+
+ unsigned Scale = VT.getScalarSizeInBits() / 8;
+ ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len);
+ if (!isUndefOrInRange(StubMask, 0, NumElts) &&
+ !isUndefOrInRange(StubMask, NumElts, 2 * NumElts))
+ return SDValue();
+
+ SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
+ Res = DAG.getBitcast(MVT::v16i8, Res);
+
+ // Use VSHLDQ/VSRLDQ ops to zero the ends of a vector and leave an
+ // inner sequential set of elements, possibly offset:
+ // 01234567 --> zzzzzz01 --> 1zzzzzzz
+ // 01234567 --> 4567zzzz --> zzzzz456
+ // 01234567 --> z0123456 --> 3456zzzz --> zz3456zz
+ if (ZeroLo == 0) {
+ unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
+ } else if (ZeroHi == 0) {
+ unsigned Shift = Mask[ZeroLo] % NumElts;
+ Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+ } else if (!Subtarget.hasSSSE3()) {
+ // If we don't have PSHUFB then its worth avoiding an AND constant mask
+ // by performing 3 byte shifts. Shuffle combining can kick in above that.
+ // TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
+ unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Shift += Mask[ZeroLo] % NumElts;
+ Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+ } else
+ return SDValue();
+
+ return DAG.getBitcast(VT, Res);
+}
+
/// Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -13339,6 +13402,11 @@ static SDValue lowerV8I16Shuffle(const S
lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
return BitBlend;
+ // Try to use byte shift instructions to mask.
+ if (SDValue V = lowerVectorShuffleAsByteShiftMask(
+ DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ return V;
+
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
Mask, Subtarget, DAG))
@@ -13588,6 +13656,11 @@ static SDValue lowerV16I8Shuffle(const S
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
return V;
+ // Try to use byte shift instructions to mask.
+ if (SDValue V = lowerVectorShuffleAsByteShiftMask(
+ DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ return V;
+
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
// blends but after all of the single-input lowerings. If the single input
Modified: llvm/trunk/test/CodeGen/X86/buildvec-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-extract.ll?rev=352883&r1=352882&r2=352883&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-extract.ll Fri Feb 1 08:02:12 2019
@@ -402,24 +402,16 @@ define <2 x i64> @extract0_i16_zext_inse
}
define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
-; SSE2-LABEL: extract1_i16_zext_insert0_i64_undef:
-; SSE2: # %bb.0:
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: extract1_i16_zext_insert0_i64_undef:
-; SSE41: # %bb.0:
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; SSE41-NEXT: retq
+; SSE-LABEL: extract1_i16_zext_insert0_i64_undef:
+; SSE: # %bb.0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
;
; AVX-LABEL: extract1_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64
@@ -446,24 +438,16 @@ define <2 x i64> @extract1_i16_zext_inse
}
define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
-; SSE2-LABEL: extract2_i16_zext_insert0_i64_undef:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: extract2_i16_zext_insert0_i64_undef:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; SSE41-NEXT: retq
+; SSE-LABEL: extract2_i16_zext_insert0_i64_undef:
+; SSE: # %bb.0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
;
; AVX-LABEL: extract2_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64
@@ -526,8 +510,9 @@ define <2 x i64> @extract3_i16_zext_inse
define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE2-LABEL: extract0_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i16_zext_insert1_i64_undef:
@@ -615,8 +600,9 @@ define <2 x i64> @extract1_i16_zext_inse
define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE2-LABEL: extract2_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i16_zext_insert1_i64_undef:
@@ -661,8 +647,9 @@ define <2 x i64> @extract2_i16_zext_inse
define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE2-LABEL: extract3_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i16_zext_insert1_i64_undef:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=352883&r1=352882&r2=352883&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Fri Feb 1 08:02:12 2019
@@ -1535,19 +1535,9 @@ define <16 x i8> @shuffle_v16i8_07_zz_zz
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,6,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,0,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
-; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz:
@@ -1569,70 +1559,65 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz
}
define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
-; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,4]
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: retq
+; SSE-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE: # %bb.0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06(<16 x i8> %a) {
-; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
-; SSE2-NEXT: packuswb %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
-; AVX-NEXT: retq
+; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
+; SSE: # %bb.0:
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <16 x i8> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=352883&r1=352882&r2=352883&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Fri Feb 1 08:02:12 2019
@@ -2477,92 +2477,99 @@ define <8 x i16> @shuffle_v8i16_8012345u
; PR40306
define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) {
-; SSE2-LABEL: shuffle_v8i16_9zzzuuuu:
-; SSE2: # %bb.0:
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v8i16_9zzzuuuu:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v8i16_9zzzuuuu:
-; SSE41: # %bb.0:
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v8i16_9zzzuuuu:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX-NEXT: retq
+; SSE-LABEL: shuffle_v8i16_9zzzuuuu:
+; SSE: # %bb.0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v8i16_9zzzuuuu:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_9zzzuuuu:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_9zzzuuuu:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-FAST-NEXT: retq
%r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %r
}
; PR40318
define <8 x i16> @shuffle_v8i16_2zzzuuuu(<8 x i16> %x) {
-; SSE2-LABEL: shuffle_v8i16_2zzzuuuu:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v8i16_2zzzuuuu:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v8i16_2zzzuuuu:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v8i16_2zzzuuuu:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX-NEXT: retq
+; SSE-LABEL: shuffle_v8i16_2zzzuuuu:
+; SSE: # %bb.0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v8i16_2zzzuuuu:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-FAST-NEXT: retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v8i16_2zzzuuuu:
+; AVX512VL-SLOW: # %bb.0:
+; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-SLOW-NEXT: retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v8i16_2zzzuuuu:
+; AVX512VL-FAST: # %bb.0:
+; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-FAST-NEXT: retq
%r = shufflevector <8 x i16> %x, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 9, i32 10, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %r
}
define <8 x i16> @shuffle_v8i16_3uu6zzzz(<8 x i16> %x) {
-; SSE2-LABEL: shuffle_v8i16_3uu6zzzz:
-; SSE2: # %bb.0:
-; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v8i16_3uu6zzzz:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,u,u,u,u,12,13],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v8i16_3uu6zzzz:
-; SSE41: # %bb.0:
-; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
-; SSE41-NEXT: retq
+; SSE-LABEL: shuffle_v8i16_3uu6zzzz:
+; SSE: # %bb.0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
;
; AVX1-LABEL: shuffle_v8i16_3uu6zzzz:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; AVX2-SLOW-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
@@ -2572,8 +2579,8 @@ define <8 x i16> @shuffle_v8i16_3uu6zzzz
;
; AVX512VL-SLOW-LABEL: shuffle_v8i16_3uu6zzzz:
; AVX512VL-SLOW: # %bb.0:
-; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; AVX512VL-SLOW-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512VL-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX512VL-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; AVX512VL-SLOW-NEXT: retq
;
; AVX512VL-FAST-LABEL: shuffle_v8i16_3uu6zzzz:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll?rev=352883&r1=352882&r2=352883&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-sse4a.ll Fri Feb 1 08:02:12 2019
@@ -410,8 +410,9 @@ define <16 x i8> @shuffle_uu_0_5_uu_uu_u
define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
; AMD10H: # %bb.0:
-; AMD10H-NEXT: psrlq $16, %xmm0
-; AMD10H-NEXT: pand {{.*}}(%rip), %xmm0
+; AMD10H-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; AMD10H-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AMD10H-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; AMD10H-NEXT: retq
;
; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
More information about the llvm-commits
mailing list