[llvm] r231380 - [DagCombiner] Allow shuffles to merge through bitcasts
Jiangning Liu
liujiangning1 at gmail.com
Sun Mar 29 21:41:43 PDT 2015
Hi Simon,
It turned out the bug is not from your commit, and your commit just
triggers the bug in the code of optimizing bitcast. I will submit a patch
to get it fixed soon.
Thanks,
-Jiangning
2015-03-30 10:43 GMT+08:00 Jiangning Liu <liujiangning1 at gmail.com>:
> Hi Simon,
>
> This commit caused regression for a small test case as described in ticket
> https://llvm.org/bugs/show_bug.cgi?id=23065 .
>
> Thanks,
> -Jiangning
>
>
> 2015-03-06 1:14 GMT+08:00 Simon Pilgrim <llvm-dev at redking.me.uk>:
>
>> Author: rksimon
>> Date: Thu Mar 5 11:14:04 2015
>> New Revision: 231380
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=231380&view=rev
>> Log:
>> [DagCombiner] Allow shuffles to merge through bitcasts
>>
>> Currently shuffles may only be combined if they are of the same type,
>> despite the fact that bitcasts are often introduced in between shuffle
>> nodes (e.g. x86 shuffle type widening).
>>
>> This patch allows a single input shuffle to peek through bitcasts and if
>> the input is another shuffle will merge them, shuffling using the smallest
>> sized type, and re-applying the bitcasts at the inputs and output instead.
>>
>> Dropped old ShuffleToZext test - this patch removes the use of the zext
>> and vector-zext.ll covers these anyhow.
>>
>> Differential Revision: http://reviews.llvm.org/D7939
>>
>> Removed:
>> llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
>> Modified:
>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
>> llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
>> llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
>> llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
>> llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar 5
>> 11:14:04 2015
>> @@ -11877,6 +11877,89 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
>> return V;
>> }
>>
>> + // If this shuffle only has a single input that is a bitcasted shuffle,
>> + // attempt to merge the 2 shuffles and suitably bitcast the
>> inputs/output
>> + // back to their original types.
>> + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
>> + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
>> + TLI.isTypeLegal(VT)) {
>> +
>> + // Peek through the bitcast only if there is one user.
>> + SDValue BC0 = N0;
>> + while (BC0.getOpcode() == ISD::BITCAST) {
>> + if (!BC0.hasOneUse())
>> + break;
>> + BC0 = BC0.getOperand(0);
>> + }
>> +
>> + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
>> + if (Scale == 1)
>> + return SmallVector<int, 8>(Mask.begin(), Mask.end());
>> +
>> + SmallVector<int, 8> NewMask;
>> + for (int M : Mask)
>> + for (int s = 0; s != Scale; ++s)
>> + NewMask.push_back(M < 0 ? -1 : Scale * M + s);
>> + return NewMask;
>> + };
>> +
>> + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
>> + EVT SVT = VT.getScalarType();
>> + EVT InnerVT = BC0->getValueType(0);
>> + EVT InnerSVT = InnerVT.getScalarType();
>> +
>> + // Determine which shuffle works with the smaller scalar type.
>> + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
>> + EVT ScaleSVT = ScaleVT.getScalarType();
>> +
>> + if (TLI.isTypeLegal(ScaleVT) &&
>> + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
>> + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
>> +
>> + int InnerScale = InnerSVT.getSizeInBits() /
>> ScaleSVT.getSizeInBits();
>> + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
>> +
>> + // Scale the shuffle masks to the smaller scalar type.
>> + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
>> + SmallVector<int, 8> InnerMask =
>> + ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
>> + SmallVector<int, 8> OuterMask =
>> + ScaleShuffleMask(SVN->getMask(), OuterScale);
>> +
>> + // Merge the shuffle masks.
>> + SmallVector<int, 8> NewMask;
>> + for (int M : OuterMask)
>> + NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
>> +
>> + // Test for shuffle mask legality over both commutations.
>> + SDValue SV0 = BC0->getOperand(0);
>> + SDValue SV1 = BC0->getOperand(1);
>> + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
>> + if (!LegalMask) {
>> + for (int i = 0, e = (int)NewMask.size(); i != e; ++i) {
>> + int idx = NewMask[i];
>> + if (idx < 0)
>> + continue;
>> + else if (idx < e)
>> + NewMask[i] = idx + e;
>> + else
>> + NewMask[i] = idx - e;
>> + }
>> + std::swap(SV0, SV1);
>> + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
>> + }
>> +
>> + if (LegalMask) {
>> + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
>> + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
>> + return DAG.getNode(
>> + ISD::BITCAST, SDLoc(N), VT,
>> + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1,
>> NewMask));
>> + }
>> + }
>> + }
>> + }
>> +
>> // Canonicalize shuffles according to rules:
>> // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
>> // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
>>
>> Removed: llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll?rev=231379&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (removed)
>> @@ -1,14 +0,0 @@
>> -; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32
>> | FileCheck %s
>> -
>> -; CHECK: test
>> -; CHECK: vpmovzxwd
>> -; CHECK: vpmovzxwd
>> -define void @test(<4 x i64> %a, <4 x i16>* %buf) {
>> - %ex1 = extractelement <4 x i64> %a, i32 0
>> - %ex2 = extractelement <4 x i64> %a, i32 1
>> - %x1 = bitcast i64 %ex1 to <4 x i16>
>> - %x2 = bitcast i64 %ex2 to <4 x i16>
>> - %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0,
>> i32 1, i32 4, i32 5>
>> - store <4 x i16> %Sh, <4 x i16>* %buf, align 1
>> - ret void
>> -}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Thu Mar 5
>> 11:14:04 2015
>> @@ -1336,3 +1336,22 @@ define <16 x i8> @shuffle_v16i8_uu_02_03
>> %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16
>> x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16,
>> i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
>> ret <16 x i8> %shuffle
>> }
>> +
>> +define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8>
>> %b) {
>> +; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
>> +; SSE: # BB#0:
>> +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; SSE-NEXT: retq
>> +;
>> +; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
>> +; AVX: # BB#0:
>> +; AVX-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; AVX-NEXT: retq
>> + %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32
>> 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32
>> 2, i32 18, i32 1, i32 17, i32 0, i32 16>
>> + %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
>> + %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
>> <4 x i32> <i32 3, i32 2, i32 1, i32 0>
>> + %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
>> + %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x
>> i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
>> + %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8>
>> + ret <16 x i8> %bitcast8
>> +}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Thu Mar 5
>> 11:14:04 2015
>> @@ -810,6 +810,25 @@ define <2 x double> @shuffle_v2f64_z1(<2
>> ret <2 x double> %shuffle
>> }
>>
>> +define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
>> +; SSE-LABEL: shuffle_v2f64_bitcast_1z:
>> +; SSE: # BB#0:
>> +; SSE-NEXT: xorpd %xmm1, %xmm1
>> +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
>> +; SSE-NEXT: retq
>> +;
>> +; AVX-LABEL: shuffle_v2f64_bitcast_1z:
>> +; AVX: # BB#0:
>> +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
>> +; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
>> +; AVX-NEXT: retq
>> + %shuffle64 = shufflevector <2 x double> %a, <2 x double>
>> zeroinitializer, <2 x i32> <i32 2, i32 1>
>> + %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
>> + %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
>> <4 x i32> <i32 2, i32 3, i32 0, i32 1>
>> + %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
>> + ret <2 x double> %bitcast64
>> +}
>> +
>> define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
>> ; SSE-LABEL: insert_reg_and_zero_v2i64:
>> ; SSE: # BB#0:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Thu Mar 5
>> 11:14:04 2015
>> @@ -1574,6 +1574,23 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4
>> ret <4 x i32> %shuffle
>> }
>>
>> +define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b)
>> {
>> +; SSE-LABEL: shuffle_v4i32_bitcast_0415:
>> +; SSE: # BB#0:
>> +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; SSE-NEXT: retq
>> +;
>> +; AVX-LABEL: shuffle_v4i32_bitcast_0415:
>> +; AVX: # BB#0:
>> +; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; AVX-NEXT: retq
>> + %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32
>> 1, i32 5, i32 0, i32 4>
>> + %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
>> + %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double>
>> undef, <2 x i32> <i32 1, i32 0>
>> + %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
>> + ret <4 x i32> %bitcast32
>> +}
>> +
>> define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
>> ; SSE-LABEL: insert_reg_and_zero_v4i32:
>> ; SSE: # BB#0:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Thu Mar 5
>> 11:14:04 2015
>> @@ -922,3 +922,22 @@ define <4 x double> @splat_v4f64(<2 x do
>> %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32>
>> zeroinitializer
>> ret <4 x double> %1
>> }
>> +
>> +define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double>
>> %b) {
>> +; AVX1-LABEL: bitcast_v4f64_0426:
>> +; AVX1: # BB#0:
>> +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
>> +; AVX1-NEXT: retq
>> +;
>> +; AVX2-LABEL: bitcast_v4f64_0426:
>> +; AVX2: # BB#0:
>> +; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 =
>> ymm0[0],ymm1[0],ymm0[2],ymm1[2]
>> +; AVX2-NEXT: retq
>> + %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
>> <i32 4, i32 0, i32 6, i32 2>
>> + %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
>> + %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef,
>> <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
>> + %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
>> + %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef,
>> <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10,
>> i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
>> + %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
>> + ret <4 x double> %bitcast64
>> +}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll Thu Mar 5 11:14:04
>> 2015
>> @@ -8,14 +8,14 @@ define void @test0(<1 x i64>* %x) {
>> ; X32: ## BB#0: ## %entry
>> ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
>> ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
>> -; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
>> +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
>> ; X32-NEXT: movlpd %xmm0, (%eax)
>> ; X32-NEXT: retl
>> ;
>> ; X64-LABEL: test0:
>> ; X64: ## BB#0: ## %entry
>> ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
>> -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
>> +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
>> ; X64-NEXT: movq %xmm0, (%rdi)
>> ; X64-NEXT: retq
>> entry:
>> @@ -84,16 +84,15 @@ define void @test2() nounwind {
>> ; X32: ## BB#0: ## %entry
>> ; X32-NEXT: movl L_tmp_V2i$non_lazy_ptr, %eax
>> ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
>> -; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
>> -; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
>> -; X32-NEXT: movlpd %xmm0, (%eax)
>> +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
>> +; X32-NEXT: movlps %xmm0, (%eax)
>> ; X32-NEXT: retl
>> ;
>> ; X64-LABEL: test2:
>> ; X64: ## BB#0: ## %entry
>> ; X64-NEXT: movq _tmp_V2i@{{.*}}(%rip), %rax
>> ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
>> -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
>> +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
>> ; X64-NEXT: movq %xmm0, (%rax)
>> ; X64-NEXT: retq
>> entry:
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150330/d8a127d9/attachment.html>
More information about the llvm-commits
mailing list