[llvm] r322644 - [X86] Don't mutate shuffle arguments after early-out for AVX512
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 18 03:39:03 PST 2018
Merged to 6.0 in r322840.
On Wed, Jan 17, 2018 at 2:01 PM, Benjamin Kramer via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: d0k
> Date: Wed Jan 17 05:01:06 2018
> New Revision: 322644
>
> URL: http://llvm.org/viewvc/llvm-project?rev=322644&view=rev
> Log:
> [X86] Don't mutate shuffle arguments after early-out for AVX512
>
> The match* functions have the annoying behavior of modifying its inputs.
> Save and restore the inputs, just in case the early out for AVX512 is
> hit. This is still not great and its only a matter of time this kind of
> bug happens again, but I couldn't come up with a better pattern without
> rewriting significant chunks of this code. Fixes PR35977.
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322644&r1=322643&r2=322644&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 17 05:01:06 2018
> @@ -28588,13 +28588,14 @@ static SDValue combineX86ShuffleChain(Ar
> }
> }
>
> + SDValue NewV1 = V1; // Save operand in case early exit happens.
> if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
> - V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
> - ShuffleVT) &&
> + NewV1, DL, DAG, Subtarget, Shuffle,
> + ShuffleSrcVT, ShuffleVT) &&
> (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
> if (Depth == 1 && Root.getOpcode() == Shuffle)
> return SDValue(); // Nothing to do!
> - Res = DAG.getBitcast(ShuffleSrcVT, V1);
> + Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
> DCI.AddToWorklist(Res.getNode());
> Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
> DCI.AddToWorklist(Res.getNode());
> @@ -28616,33 +28617,36 @@ static SDValue combineX86ShuffleChain(Ar
> }
> }
>
> + SDValue NewV1 = V1; // Save operands in case early exit happens.
> + SDValue NewV2 = V2;
> if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
> - V1, V2, DL, DAG, Subtarget, Shuffle,
> + NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
> ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
> (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
> if (Depth == 1 && Root.getOpcode() == Shuffle)
> return SDValue(); // Nothing to do!
> - V1 = DAG.getBitcast(ShuffleSrcVT, V1);
> - DCI.AddToWorklist(V1.getNode());
> - V2 = DAG.getBitcast(ShuffleSrcVT, V2);
> - DCI.AddToWorklist(V2.getNode());
> - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
> + NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
> + DCI.AddToWorklist(NewV1.getNode());
> + NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
> + DCI.AddToWorklist(NewV2.getNode());
> + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
> DCI.AddToWorklist(Res.getNode());
> return DAG.getBitcast(RootVT, Res);
> }
>
> - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
> - AllowIntDomain, V1, V2, DL, DAG,
> - Subtarget, Shuffle, ShuffleVT,
> - PermuteImm) &&
> + NewV1 = V1; // Save operands in case early exit happens.
> + NewV2 = V2;
> + if (matchBinaryPermuteVectorShuffle(
> + MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
> + NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
> (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
> if (Depth == 1 && Root.getOpcode() == Shuffle)
> return SDValue(); // Nothing to do!
> - V1 = DAG.getBitcast(ShuffleVT, V1);
> - DCI.AddToWorklist(V1.getNode());
> - V2 = DAG.getBitcast(ShuffleVT, V2);
> - DCI.AddToWorklist(V2.getNode());
> - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
> + NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
> + DCI.AddToWorklist(NewV1.getNode());
> + NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
> + DCI.AddToWorklist(NewV2.getNode());
> + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
> DAG.getConstant(PermuteImm, DL, MVT::i8));
> DCI.AddToWorklist(Res.getNode());
> return DAG.getBitcast(RootVT, Res);
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=322644&r1=322643&r2=322644&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Wed Jan 17 05:01:06 2018
> @@ -4788,3 +4788,43 @@ define <2 x double> @test_masked_z_8xdou
> ret <2 x double> %res
> }
>
> +; PR35977
> +define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
> +; CHECK-LABEL: test_zext_v8i8_to_v8i16:
> +; CHECK: # %bb.0:
> +; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
> +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
> +; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
> +; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
> +; CHECK-NEXT: retq
> + %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
> + %tmp2 = load <8 x i8>, <8 x i8>* %tmp
> + %tmp3 = extractelement <8 x i8> %tmp2, i32 0
> + %tmp4 = zext i8 %tmp3 to i16
> + %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
> + %tmp6 = extractelement <8 x i8> %tmp2, i32 1
> + %tmp7 = zext i8 %tmp6 to i16
> + %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
> + %tmp9 = extractelement <8 x i8> %tmp2, i32 2
> + %tmp10 = zext i8 %tmp9 to i16
> + %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
> + %tmp12 = extractelement <8 x i8> %tmp2, i32 3
> + %tmp13 = zext i8 %tmp12 to i16
> + %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
> + %tmp15 = extractelement <8 x i8> %tmp2, i32 4
> + %tmp16 = zext i8 %tmp15 to i16
> + %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
> + %tmp18 = extractelement <8 x i8> %tmp2, i32 5
> + %tmp19 = zext i8 %tmp18 to i16
> + %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
> + %tmp21 = extractelement <8 x i8> %tmp2, i32 6
> + %tmp22 = zext i8 %tmp21 to i16
> + %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
> + %tmp24 = extractelement <8 x i8> %tmp2, i32 7
> + %tmp25 = zext i8 %tmp24 to i16
> + %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
> + %tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
> + %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
> + store <8 x i16> %tmp27, <8 x i16>* %tmp28
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list