[llvm] r336113 - [X86][SSE] Blend any v8i16/v4i32 shift with 2 shift unique values

Benjamin Kramer via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 3 04:20:11 PDT 2018


This crashes on some vector code, reduced test case attached and reverted
in r336189.

Reproduce with:
$ llc bugpoint-reduced-simplified.ll -mcpu=corei7-avx
llc: llvm/include/llvm/Support/Casting.h:92: static bool
llvm::isa_impl_cl<llvm::ConstantSDNode, llvm::SDNode *>::doit(const From *)
[To = llvm::ConstantSDNode, From = llvm::SDNode *]: Assertion `Val &&
"isa<> used on a null pointer"' failed.

On Mon, Jul 2, 2018 at 5:18 PM Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: rksimon
> Date: Mon Jul  2 08:14:07 2018
> New Revision: 336113
>
> URL: http://llvm.org/viewvc/llvm-project?rev=336113&view=rev
> Log:
> [X86][SSE] Blend any v8i16/v4i32 shift with 2 shift unique values
>
> We were only doing this for basic blends, despite shuffle lowering now
> being good enough to handle more complex blends. This means that the two
> v8i16 splat shifts are performed in parallel instead of serially as the
> general shift case.
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=336113&r1=336112&r2=336113&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jul  2 08:14:07 2018
> @@ -23441,7 +23441,7 @@ static SDValue LowerShift(SDValue Op, co
>        return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
>
>    // If possible, lower this shift as a sequence of two shifts by
> -  // constant plus a MOVSS/MOVSD/PBLEND instead of scalarizing it.
> +  // constant plus a BLENDing shuffle instead of scalarizing it.
>    // Example:
>    //   (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
>    //
> @@ -23449,64 +23449,39 @@ static SDValue LowerShift(SDValue Op, co
>    //   (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
>    //
>    // The advantage is that the two shifts from the example would be
> -  // lowered as X86ISD::VSRLI nodes. This would be cheaper than
> scalarizing
> -  // the vector shift into four scalar shifts plus four pairs of vector
> -  // insert/extract.
> +  // lowered as X86ISD::VSRLI nodes in parallel before blending.
>    if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32)) {
> -    bool UseMOVSD = false;
> -    bool CanBeSimplified;
> -    // The splat value for the first packed shift (the 'X' from the
> example).
> -    SDValue Amt1 = Amt->getOperand(0);
> -    // The splat value for the second packed shift (the 'Y' from the
> example).
> -    SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) :
> Amt->getOperand(2);
> -
> -    // See if it is possible to replace this node with a sequence of
> -    // two shifts followed by a MOVSS/MOVSD/PBLEND.
> -    if (VT == MVT::v4i32) {
> -      // Check if it is legal to use a MOVSS.
> -      CanBeSimplified = Amt2 == Amt->getOperand(2) &&
> -                        Amt2 == Amt->getOperand(3);
> -      if (!CanBeSimplified) {
> -        // Otherwise, check if we can still simplify this node using a
> MOVSD.
> -        CanBeSimplified = Amt1 == Amt->getOperand(1) &&
> -                          Amt->getOperand(2) == Amt->getOperand(3);
> -        UseMOVSD = true;
> -        Amt2 = Amt->getOperand(2);
> +    SDValue Amt1, Amt2;
> +    unsigned NumElts = VT.getVectorNumElements();
> +    SmallVector<int, 8> ShuffleMask;
> +    for (unsigned i = 0; i != NumElts; ++i) {
> +      SDValue A = Amt->getOperand(i);
> +      if (A.isUndef()) {
> +        ShuffleMask.push_back(SM_SentinelUndef);
> +        continue;
>        }
> -    } else {
> -      // Do similar checks for the case where the machine value type
> -      // is MVT::v8i16.
> -      CanBeSimplified = Amt1 == Amt->getOperand(1);
> -      for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
> -        CanBeSimplified = Amt2 == Amt->getOperand(i);
> -
> -      if (!CanBeSimplified) {
> -        UseMOVSD = true;
> -        CanBeSimplified = true;
> -        Amt2 = Amt->getOperand(4);
> -        for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
> -          CanBeSimplified = Amt1 == Amt->getOperand(i);
> -        for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
> -          CanBeSimplified = Amt2 == Amt->getOperand(j);
> +      if (!Amt1 || Amt1 == A) {
> +        ShuffleMask.push_back(i);
> +        Amt1 = A;
> +        continue;
> +      }
> +      if (!Amt2 || Amt2 == A) {
> +        ShuffleMask.push_back(i + NumElts);
> +        Amt2 = A;
> +        continue;
>        }
> +      break;
>      }
>
> -    if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
> +    if (ShuffleMask.size() == NumElts && isa<ConstantSDNode>(Amt1) &&
>          isa<ConstantSDNode>(Amt2)) {
> -      // Replace this node with two shifts followed by a
> MOVSS/MOVSD/PBLEND.
>        SDValue Splat1 =
>            DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(),
> dl, VT);
>        SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
>        SDValue Splat2 =
>            DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(),
> dl, VT);
>        SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
> -      SDValue BitCast1 = DAG.getBitcast(MVT::v4i32, Shift1);
> -      SDValue BitCast2 = DAG.getBitcast(MVT::v4i32, Shift2);
> -      if (UseMOVSD)
> -        return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl,
> BitCast1,
> -                                                       BitCast2, {0, 1,
> 6, 7}));
> -      return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl,
> BitCast1,
> -                                                     BitCast2, {0, 5, 6,
> 7}));
> +      return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
>      }
>    }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll?rev=336113&r1=336112&r2=336113&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll Mon Jul  2 08:14:07 2018
> @@ -211,31 +211,21 @@ define <4 x i32> @test8(<4 x i32> %a) {
>  define <8 x i16> @test9(<8 x i16> %a) {
>  ; SSE-LABEL: test9:
>  ; SSE:       # %bb.0:
> -; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,0,0,0]
>  ; SSE-NEXT:    movdqa %xmm0, %xmm1
> -; SSE-NEXT:    pand %xmm2, %xmm1
> -; SSE-NEXT:    psraw $2, %xmm0
> -; SSE-NEXT:    pandn %xmm0, %xmm2
> -; SSE-NEXT:    por %xmm2, %xmm1
> -; SSE-NEXT:    psraw $1, %xmm1
> -; SSE-NEXT:    movdqa %xmm1, %xmm0
> +; SSE-NEXT:    psraw $3, %xmm1
> +; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,0,0,0]
> +; SSE-NEXT:    psraw $1, %xmm0
> +; SSE-NEXT:    pand %xmm2, %xmm0
> +; SSE-NEXT:    pandn %xmm1, %xmm2
> +; SSE-NEXT:    por %xmm2, %xmm0
>  ; SSE-NEXT:    retq
>  ;
> -; AVX1-LABEL: test9:
> -; AVX1:       # %bb.0:
> -; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm1
> -; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 =
> xmm0[0],xmm1[1],xmm0[2,3,4],xmm1[5,6,7]
> -; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm0
> -; AVX1-NEXT:    retq
> -;
> -; AVX2-LABEL: test9:
> -; AVX2:       # %bb.0:
> -; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
> -; AVX2-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0
> -; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
> -; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
> -; AVX2-NEXT:    vzeroupper
> -; AVX2-NEXT:    retq
> +; AVX-LABEL: test9:
> +; AVX:       # %bb.0:
> +; AVX-NEXT:    vpsraw $3, %xmm0, %xmm1
> +; AVX-NEXT:    vpsraw $1, %xmm0, %xmm0
> +; AVX-NEXT:    vpblendw {{.*#+}} xmm0 =
> xmm0[0],xmm1[1],xmm0[2,3,4],xmm1[5,6,7]
> +; AVX-NEXT:    retq
>    %lshr = ashr <8 x i16> %a, <i16 1, i16 3, i16 1, i16 1, i16 1, i16 3,
> i16 3, i16 3>
>    ret <8 x i16> %lshr
>  }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180703/0d2d4d6a/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: bugpoint-reduced-simplified.ll
Type: application/octet-stream
Size: 3145 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180703/0d2d4d6a/attachment.obj>


More information about the llvm-commits mailing list