[llvm] r322644 - [X86] Don't mutate shuffle arguments after early-out for AVX512
Benjamin Kramer via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 17 05:01:06 PST 2018
Author: d0k
Date: Wed Jan 17 05:01:06 2018
New Revision: 322644
URL: http://llvm.org/viewvc/llvm-project?rev=322644&view=rev
Log:
[X86] Don't mutate shuffle arguments after early-out for AVX512
The match* functions have the annoying behavior of modifying its inputs.
Save and restore the inputs, just in case the early out for AVX512 is
hit. This is still not great and its only a matter of time this kind of
bug happens again, but I couldn't come up with a better pattern without
rewriting significant chunks of this code. Fixes PR35977.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=322644&r1=322643&r2=322644&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 17 05:01:06 2018
@@ -28588,13 +28588,14 @@ static SDValue combineX86ShuffleChain(Ar
}
}
+ SDValue NewV1 = V1; // Save operand in case early exit happens.
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT) &&
+ NewV1, DL, DAG, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(ShuffleSrcVT, V1);
+ Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
@@ -28616,33 +28617,36 @@ static SDValue combineX86ShuffleChain(Ar
}
}
+ SDValue NewV1 = V1; // Save operands in case early exit happens.
+ SDValue NewV2 = V2;
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, V2, DL, DAG, Subtarget, Shuffle,
+ NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleSrcVT, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(ShuffleSrcVT, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
+ NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
+ DCI.AddToWorklist(NewV1.getNode());
+ NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
+ DCI.AddToWorklist(NewV2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, V1, V2, DL, DAG,
- Subtarget, Shuffle, ShuffleVT,
- PermuteImm) &&
+ NewV1 = V1; // Save operands in case early exit happens.
+ NewV2 = V2;
+ if (matchBinaryPermuteVectorShuffle(
+ MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
+ NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleVT, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(ShuffleVT, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
+ NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
+ DCI.AddToWorklist(NewV1.getNode());
+ NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
+ DCI.AddToWorklist(NewV2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=322644&r1=322643&r2=322644&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Wed Jan 17 05:01:06 2018
@@ -4788,3 +4788,43 @@ define <2 x double> @test_masked_z_8xdou
ret <2 x double> %res
}
+; PR35977
+define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
+; CHECK-LABEL: test_zext_v8i8_to_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
+; CHECK-NEXT: retq
+ %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
+ %tmp2 = load <8 x i8>, <8 x i8>* %tmp
+ %tmp3 = extractelement <8 x i8> %tmp2, i32 0
+ %tmp4 = zext i8 %tmp3 to i16
+ %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
+ %tmp6 = extractelement <8 x i8> %tmp2, i32 1
+ %tmp7 = zext i8 %tmp6 to i16
+ %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
+ %tmp9 = extractelement <8 x i8> %tmp2, i32 2
+ %tmp10 = zext i8 %tmp9 to i16
+ %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
+ %tmp12 = extractelement <8 x i8> %tmp2, i32 3
+ %tmp13 = zext i8 %tmp12 to i16
+ %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
+ %tmp15 = extractelement <8 x i8> %tmp2, i32 4
+ %tmp16 = zext i8 %tmp15 to i16
+ %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
+ %tmp18 = extractelement <8 x i8> %tmp2, i32 5
+ %tmp19 = zext i8 %tmp18 to i16
+ %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
+ %tmp21 = extractelement <8 x i8> %tmp2, i32 6
+ %tmp22 = zext i8 %tmp21 to i16
+ %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
+ %tmp24 = extractelement <8 x i8> %tmp2, i32 7
+ %tmp25 = zext i8 %tmp24 to i16
+ %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
+ %tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
+ store <8 x i16> %tmp27, <8 x i16>* %tmp28
+ ret void
+}
More information about the llvm-commits
mailing list