[llvm] r272471 - [X86][SSE] Added PSLLDQ/PSRLDQ as a target shuffle type

Sat Jun 11 16:59:00 PDT 2016

Nice!

-- Sean Silva

On Sat, Jun 11, 2016 at 6:38 AM, Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: rksimon
> Date: Sat Jun 11 08:38:28 2016
> New Revision: 272471
>
> URL: http://llvm.org/viewvc/llvm-project?rev=272471&view=rev
> Log:
> [X86][SSE] Added PSLLDQ/PSRLDQ as a target shuffle type
>
> Ensure that PALIGNR/PSLLDQ/PSRLDQ are byte vectors so that they can be
> correctly decoded for target shuffle combining
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
>     llvm/trunk/test/CodeGen/X86/vector-zext.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=272471&r1=272470&r2=272471&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jun 11 08:38:28 2016
> @@ -3806,6 +3806,8 @@ static bool isTargetShuffle(unsigned Opc
>    case X86ISD::SHUFP:
>    case X86ISD::INSERTPS:
>    case X86ISD::PALIGNR:
> +  case X86ISD::VSHLDQ:
> +  case X86ISD::VSRLDQ:
>    case X86ISD::MOVLHPS:
>    case X86ISD::MOVLHPD:
>    case X86ISD::MOVHLPS:
> @@ -4878,9 +4880,22 @@ static bool getTargetShuffleMask(SDNode
>      IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
>      break;
>    case X86ISD::PALIGNR:
> +    assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
>      ImmN = N->getOperand(N->getNumOperands()-1);
>      DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
> Mask);
>      break;
> +  case X86ISD::VSHLDQ:
> +    assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
> +    ImmN = N->getOperand(N->getNumOperands() - 1);
> +    DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
> Mask);
> +    IsUnary = true;
> +    break;
> +  case X86ISD::VSRLDQ:
> +    assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
> +    ImmN = N->getOperand(N->getNumOperands() - 1);
> +    DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
> Mask);
> +    IsUnary = true;
> +    break;
>    case X86ISD::PSHUFD:
>    case X86ISD::VPERMILPI:
>      ImmN = N->getOperand(N->getNumOperands()-1);
> @@ -30175,6 +30190,8 @@ SDValue X86TargetLowering::PerformDAGCom
>    case X86ISD::SHUFP:       // Handle all target specific shuffles
>    case X86ISD::INSERTPS:
>    case X86ISD::PALIGNR:
> +  case X86ISD::VSHLDQ:
> +  case X86ISD::VSRLDQ:
>    case X86ISD::BLENDI:
>    case X86ISD::UNPCKH:
>    case X86ISD::UNPCKL:
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=272471&r1=272470&r2=272471&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sat Jun
> 11 08:38:28 2016
> @@ -8,8 +8,7 @@ declare <32 x i8> @llvm.x86.avx2.pshuf.b
>  define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
>  ; CHECK-LABEL: combine_pshufb_pslldq:
>  ; CHECK:       # BB#0:
> -; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 =
> zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
> -; CHECK-NEXT:    vpslldq {{.*#+}} ymm0 =
> zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
> +; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
>  ; CHECK-NEXT:    retq
>    %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x
> i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0,
> i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128,
> i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6,
> i8 7>)
>    %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32>
> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32
> 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32
> 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20,
> i32 21, i32 22, i32 23>
> @@ -19,8 +18,7 @@ define <32 x i8> @combine_pshufb_pslldq(
>  define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
>  ; CHECK-LABEL: combine_pshufb_psrldq:
>  ; CHECK:       # BB#0:
> -; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 =
> ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
> -; CHECK-NEXT:    vpsrldq {{.*#+}} ymm0 =
> ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
> +; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
>  ; CHECK-NEXT:    retq
>    %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x
> i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128,
> i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11,
> i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128,
> i8 128, i8 128>)
>    %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32>
> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32
> 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26,
> i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32
> 32, i32 32, i32 32, i32 32>
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=272471&r1=272470&r2=272471&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
> (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sat Jun
> 11 08:38:28 2016
> @@ -110,14 +110,12 @@ define <16 x i8> @combine_pshufb_palignr
>  define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) {
>  ; SSE-LABEL: combine_pshufb_pslldq:
>  ; SSE:       # BB#0:
> -; SSE-NEXT:    pshufb {{.*#+}} xmm0 =
> zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
> -; SSE-NEXT:    pslldq {{.*#+}} xmm0 =
> zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
> +; SSE-NEXT:    xorps %xmm0, %xmm0
>  ; SSE-NEXT:    retq
>  ;
>  ; AVX-LABEL: combine_pshufb_pslldq:
>  ; AVX:       # BB#0:
> -; AVX-NEXT:    vpshufb {{.*#+}} xmm0 =
> zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
> -; AVX-NEXT:    vpslldq {{.*#+}} xmm0 =
> zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
> +; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
>  ; AVX-NEXT:    retq
>    %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16
> x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8
> 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
>    %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32>
> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32
> 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
> @@ -127,14 +125,12 @@ define <16 x i8> @combine_pshufb_pslldq(
>  define <16 x i8> @combine_pshufb_psrldq(<16 x i8> %a0) {
>  ; SSE-LABEL: combine_pshufb_psrldq:
>  ; SSE:       # BB#0:
> -; SSE-NEXT:    pshufb {{.*#+}} xmm0 =
> xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
> -; SSE-NEXT:    psrldq {{.*#+}} xmm0 =
> xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
> +; SSE-NEXT:    xorps %xmm0, %xmm0
>  ; SSE-NEXT:    retq
>  ;
>  ; AVX-LABEL: combine_pshufb_psrldq:
>  ; AVX:       # BB#0:
> -; AVX-NEXT:    vpshufb {{.*#+}} xmm0 =
> xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
> -; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 =
> xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
> +; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
>  ; AVX-NEXT:    retq
>    %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16
> x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8
> 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
>    %2 = shufflevector <16 x i8> %1, <16 x i8> zeroinitializer, <16 x i32>
> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32
> 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=272471&r1=272470&r2=272471&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Sat Jun 11 08:38:28 2016
> @@ -1378,10 +1378,7 @@ define <2 x i64> @shuf_zext_8i16_to_2i64
>  ;
>  ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
>  ; SSSE3:       # BB#0: # %entry
> -; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 =
> xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
> -; SSSE3-NEXT:    pxor %xmm1, %xmm1
> -; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 =
> xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
> -; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
> +; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 =
> xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
>  ; SSSE3-NEXT:    retq
>  ;
>  ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160611/44e484d3/attachment.html>