[llvm] r322644 - [X86] Don't mutate shuffle arguments after early-out for AVX512

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 03:39:03 PST 2018


Merged to 6.0 in r322840.

On Wed, Jan 17, 2018 at 2:01 PM, Benjamin Kramer via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: d0k
> Date: Wed Jan 17 05:01:06 2018
> New Revision: 322644
>
> URL: http://llvm.org/viewvc/llvm-project?rev=322644&view=rev
> Log:
> [X86] Don't mutate shuffle arguments after early-out for AVX512
>
> The match* functions have the annoying behavior of modifying its inputs.
> Save and restore the inputs, just in case the early out for AVX512 is
> hit. This is still not great and its only a matter of time this kind of
> bug happens again, but I couldn't come up with a better pattern without
> rewriting significant chunks of this code. Fixes PR35977.
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322644&r1=322643&r2=322644&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 17 05:01:06 2018
> @@ -28588,13 +28588,14 @@ static SDValue combineX86ShuffleChain(Ar
>        }
>      }
>
> +    SDValue NewV1 = V1; // Save operand in case early exit happens.
>      if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
> -                                V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
> -                                ShuffleVT) &&
> +                                NewV1, DL, DAG, Subtarget, Shuffle,
> +                                ShuffleSrcVT, ShuffleVT) &&
>          (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
>        if (Depth == 1 && Root.getOpcode() == Shuffle)
>          return SDValue(); // Nothing to do!
> -      Res = DAG.getBitcast(ShuffleSrcVT, V1);
> +      Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
>        DCI.AddToWorklist(Res.getNode());
>        Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
>        DCI.AddToWorklist(Res.getNode());
> @@ -28616,33 +28617,36 @@ static SDValue combineX86ShuffleChain(Ar
>      }
>    }
>
> +  SDValue NewV1 = V1; // Save operands in case early exit happens.
> +  SDValue NewV2 = V2;
>    if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
> -                               V1, V2, DL, DAG, Subtarget, Shuffle,
> +                               NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
>                                 ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
>        (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
>      if (Depth == 1 && Root.getOpcode() == Shuffle)
>        return SDValue(); // Nothing to do!
> -    V1 = DAG.getBitcast(ShuffleSrcVT, V1);
> -    DCI.AddToWorklist(V1.getNode());
> -    V2 = DAG.getBitcast(ShuffleSrcVT, V2);
> -    DCI.AddToWorklist(V2.getNode());
> -    Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
> +    NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
> +    DCI.AddToWorklist(NewV1.getNode());
> +    NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
> +    DCI.AddToWorklist(NewV2.getNode());
> +    Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
>      DCI.AddToWorklist(Res.getNode());
>      return DAG.getBitcast(RootVT, Res);
>    }
>
> -  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
> -                                      AllowIntDomain, V1, V2, DL, DAG,
> -                                      Subtarget, Shuffle, ShuffleVT,
> -                                      PermuteImm) &&
> +  NewV1 = V1; // Save operands in case early exit happens.
> +  NewV2 = V2;
> +  if (matchBinaryPermuteVectorShuffle(
> +          MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
> +          NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
>        (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
>      if (Depth == 1 && Root.getOpcode() == Shuffle)
>        return SDValue(); // Nothing to do!
> -    V1 = DAG.getBitcast(ShuffleVT, V1);
> -    DCI.AddToWorklist(V1.getNode());
> -    V2 = DAG.getBitcast(ShuffleVT, V2);
> -    DCI.AddToWorklist(V2.getNode());
> -    Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
> +    NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
> +    DCI.AddToWorklist(NewV1.getNode());
> +    NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
> +    DCI.AddToWorklist(NewV2.getNode());
> +    Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
>                        DAG.getConstant(PermuteImm, DL, MVT::i8));
>      DCI.AddToWorklist(Res.getNode());
>      return DAG.getBitcast(RootVT, Res);
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=322644&r1=322643&r2=322644&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Wed Jan 17 05:01:06 2018
> @@ -4788,3 +4788,43 @@ define <2 x double> @test_masked_z_8xdou
>    ret <2 x double> %res
>  }
>
> +; PR35977
> +define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
> +; CHECK-LABEL: test_zext_v8i8_to_v8i16:
> +; CHECK:       # %bb.0:
> +; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
> +; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
> +; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
> +; CHECK-NEXT:    vmovdqa %xmm0, (%rsi)
> +; CHECK-NEXT:    retq
> +  %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
> +  %tmp2 = load <8 x i8>, <8 x i8>* %tmp
> +  %tmp3 = extractelement <8 x i8> %tmp2, i32 0
> +  %tmp4 = zext i8 %tmp3 to i16
> +  %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
> +  %tmp6 = extractelement <8 x i8> %tmp2, i32 1
> +  %tmp7 = zext i8 %tmp6 to i16
> +  %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
> +  %tmp9 = extractelement <8 x i8> %tmp2, i32 2
> +  %tmp10 = zext i8 %tmp9 to i16
> +  %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
> +  %tmp12 = extractelement <8 x i8> %tmp2, i32 3
> +  %tmp13 = zext i8 %tmp12 to i16
> +  %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
> +  %tmp15 = extractelement <8 x i8> %tmp2, i32 4
> +  %tmp16 = zext i8 %tmp15 to i16
> +  %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
> +  %tmp18 = extractelement <8 x i8> %tmp2, i32 5
> +  %tmp19 = zext i8 %tmp18 to i16
> +  %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
> +  %tmp21 = extractelement <8 x i8> %tmp2, i32 6
> +  %tmp22 = zext i8 %tmp21 to i16
> +  %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
> +  %tmp24 = extractelement <8 x i8> %tmp2, i32 7
> +  %tmp25 = zext i8 %tmp24 to i16
> +  %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
> +  %tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
> +  %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
> +  store <8 x i16> %tmp27, <8 x i16>* %tmp28
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list