[llvm] r231380 - [DagCombiner] Allow shuffles to merge through bitcasts

Sun Mar 29 21:41:43 PDT 2015

Hi Simon,

It turned out the bug is not from your commit, and your commit just
triggers the bug in the code of optimizing bitcast. I will submit a patch
to get it fixed soon.

Thanks,
-Jiangning

2015-03-30 10:43 GMT+08:00 Jiangning Liu <liujiangning1 at gmail.com>:

> Hi Simon,
>
> This commit caused regression for a small test case as described in ticket
> https://llvm.org/bugs/show_bug.cgi?id=23065 .
>
> Thanks,
> -Jiangning
>
>
> 2015-03-06 1:14 GMT+08:00 Simon Pilgrim <llvm-dev at redking.me.uk>:
>
>> Author: rksimon
>> Date: Thu Mar  5 11:14:04 2015
>> New Revision: 231380
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=231380&view=rev
>> Log:
>> [DagCombiner] Allow shuffles to merge through bitcasts
>>
>> Currently shuffles may only be combined if they are of the same type,
>> despite the fact that bitcasts are often introduced in between shuffle
>> nodes (e.g. x86 shuffle type widening).
>>
>> This patch allows a single input shuffle to peek through bitcasts and if
>> the input is another shuffle will merge them, shuffling using the smallest
>> sized type, and re-applying the bitcasts at the inputs and output instead.
>>
>> Dropped old ShuffleToZext test - this patch removes the use of the zext
>> and vector-zext.ll covers these anyhow.
>>
>> Differential Revision: http://reviews.llvm.org/D7939
>>
>> Removed:
>>     llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
>> Modified:
>>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>     llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
>>     llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
>>     llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
>>     llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
>>     llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar  5
>> 11:14:04 2015
>> @@ -11877,6 +11877,89 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
>>        return V;
>>    }
>>
>> +  // If this shuffle only has a single input that is a bitcasted shuffle,
>> +  // attempt to merge the 2 shuffles and suitably bitcast the
>> inputs/output
>> +  // back to their original types.
>> +  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
>> +      N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
>> +      TLI.isTypeLegal(VT)) {
>> +
>> +    // Peek through the bitcast only if there is one user.
>> +    SDValue BC0 = N0;
>> +    while (BC0.getOpcode() == ISD::BITCAST) {
>> +      if (!BC0.hasOneUse())
>> +        break;
>> +      BC0 = BC0.getOperand(0);
>> +    }
>> +
>> +    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
>> +      if (Scale == 1)
>> +        return SmallVector<int, 8>(Mask.begin(), Mask.end());
>> +
>> +      SmallVector<int, 8> NewMask;
>> +      for (int M : Mask)
>> +        for (int s = 0; s != Scale; ++s)
>> +          NewMask.push_back(M < 0 ? -1 : Scale * M + s);
>> +      return NewMask;
>> +    };
>> +
>> +    if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
>> +      EVT SVT = VT.getScalarType();
>> +      EVT InnerVT = BC0->getValueType(0);
>> +      EVT InnerSVT = InnerVT.getScalarType();
>> +
>> +      // Determine which shuffle works with the smaller scalar type.
>> +      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
>> +      EVT ScaleSVT = ScaleVT.getScalarType();
>> +
>> +      if (TLI.isTypeLegal(ScaleVT) &&
>> +          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
>> +          0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
>> +
>> +        int InnerScale = InnerSVT.getSizeInBits() /
>> ScaleSVT.getSizeInBits();
>> +        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
>> +
>> +        // Scale the shuffle masks to the smaller scalar type.
>> +        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
>> +        SmallVector<int, 8> InnerMask =
>> +            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
>> +        SmallVector<int, 8> OuterMask =
>> +            ScaleShuffleMask(SVN->getMask(), OuterScale);
>> +
>> +        // Merge the shuffle masks.
>> +        SmallVector<int, 8> NewMask;
>> +        for (int M : OuterMask)
>> +          NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
>> +
>> +        // Test for shuffle mask legality over both commutations.
>> +        SDValue SV0 = BC0->getOperand(0);
>> +        SDValue SV1 = BC0->getOperand(1);
>> +        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
>> +        if (!LegalMask) {
>> +          for (int i = 0, e = (int)NewMask.size(); i != e; ++i) {
>> +            int idx = NewMask[i];
>> +            if (idx < 0)
>> +              continue;
>> +            else if (idx < e)
>> +              NewMask[i] = idx + e;
>> +            else
>> +              NewMask[i] = idx - e;
>> +          }
>> +          std::swap(SV0, SV1);
>> +          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
>> +        }
>> +
>> +        if (LegalMask) {
>> +          SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
>> +          SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
>> +          return DAG.getNode(
>> +              ISD::BITCAST, SDLoc(N), VT,
>> +              DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1,
>> NewMask));
>> +        }
>> +      }
>> +    }
>> +  }
>> +
>>    // Canonicalize shuffles according to rules:
>>    //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
>>    //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
>>
>> Removed: llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll?rev=231379&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (removed)
>> @@ -1,14 +0,0 @@
>> -; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32
>> | FileCheck %s
>> -
>> -; CHECK: test
>> -; CHECK: vpmovzxwd
>> -; CHECK: vpmovzxwd
>> -define void @test(<4 x i64> %a, <4 x i16>* %buf) {
>> -  %ex1 = extractelement <4 x i64> %a, i32 0
>> -  %ex2 = extractelement <4 x i64> %a, i32 1
>> -  %x1 = bitcast i64 %ex1 to <4 x i16>
>> -  %x2 = bitcast i64 %ex2 to <4 x i16>
>> -  %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0,
>> i32 1, i32 4, i32 5>
>> -  store <4 x i16> %Sh, <4 x i16>* %buf, align 1
>> -  ret void
>> -}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Thu Mar  5
>> 11:14:04 2015
>> @@ -1336,3 +1336,22 @@ define <16 x i8> @shuffle_v16i8_uu_02_03
>>    %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16
>> x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16,
>> i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
>>    ret <16 x i8> %shuffle
>>  }
>> +
>> +define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8>
>> %b) {
>> +; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
>> +; SSE:       # BB#0:
>> +; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; SSE-NEXT:    retq
>> +;
>> +; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
>> +; AVX:       # BB#0:
>> +; AVX-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; AVX-NEXT:    retq
>> +  %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32
>> 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32
>> 2, i32 18, i32 1, i32 17, i32 0, i32 16>
>> +  %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
>> +  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
>> <4 x i32> <i32 3, i32 2, i32 1, i32 0>
>> +  %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
>> +  %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x
>> i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
>> +  %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
>> +  ret <16 x i8> %bitcast8
>> +}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Thu Mar  5
>> 11:14:04 2015
>> @@ -810,6 +810,25 @@ define <2 x double> @shuffle_v2f64_z1(<2
>>    ret <2 x double> %shuffle
>>  }
>>
>> +define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
>> +; SSE-LABEL: shuffle_v2f64_bitcast_1z:
>> +; SSE:       # BB#0:
>> +; SSE-NEXT:    xorpd %xmm1, %xmm1
>> +; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
>> +; SSE-NEXT:    retq
>> +;
>> +; AVX-LABEL: shuffle_v2f64_bitcast_1z:
>> +; AVX:       # BB#0:
>> +; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
>> +; AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
>> +; AVX-NEXT:    retq
>> +  %shuffle64 = shufflevector <2 x double> %a, <2 x double>
>> zeroinitializer, <2 x i32> <i32 2, i32 1>
>> +  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
>> +  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
>> <4 x i32> <i32 2, i32 3, i32 0, i32 1>
>> +  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
>> +  ret <2 x double> %bitcast64
>> +}
>> +
>>  define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
>>  ; SSE-LABEL: insert_reg_and_zero_v2i64:
>>  ; SSE:       # BB#0:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Thu Mar  5
>> 11:14:04 2015
>> @@ -1574,6 +1574,23 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4
>>    ret <4 x i32> %shuffle
>>  }
>>
>> +define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b)
>> {
>> +; SSE-LABEL: shuffle_v4i32_bitcast_0415:
>> +; SSE:       # BB#0:
>> +; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; SSE-NEXT:    retq
>> +;
>> +; AVX-LABEL: shuffle_v4i32_bitcast_0415:
>> +; AVX:       # BB#0:
>> +; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>> +; AVX-NEXT:    retq
>> +  %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32
>> 1, i32 5, i32 0, i32 4>
>> +  %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
>> +  %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double>
>> undef, <2 x i32> <i32 1, i32 0>
>> +  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
>> +  ret <4 x i32> %bitcast32
>> +}
>> +
>>  define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
>>  ; SSE-LABEL: insert_reg_and_zero_v4i32:
>>  ; SSE:       # BB#0:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Thu Mar  5
>> 11:14:04 2015
>> @@ -922,3 +922,22 @@ define <4 x double> @splat_v4f64(<2 x do
>>    %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32>
>> zeroinitializer
>>    ret <4 x double> %1
>>  }
>> +
>> +define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double>
>> %b) {
>> +; AVX1-LABEL: bitcast_v4f64_0426:
>> +; AVX1:       # BB#0:
>> +; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
>> +; AVX1-NEXT:    retq
>> +;
>> +; AVX2-LABEL: bitcast_v4f64_0426:
>> +; AVX2:       # BB#0:
>> +; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} ymm0 =
>> ymm0[0],ymm1[0],ymm0[2],ymm1[2]
>> +; AVX2-NEXT:    retq
>> +  %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
>> <i32 4, i32 0, i32 6, i32 2>
>> +  %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
>> +  %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef,
>> <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
>> +  %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
>> +  %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef,
>> <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10,
>> i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
>> +  %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
>> +  ret <4 x double> %bitcast64
>> +}
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll?rev=231380&r1=231379&r2=231380&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll Thu Mar  5 11:14:04
>> 2015
>> @@ -8,14 +8,14 @@ define void @test0(<1 x i64>* %x) {
>>  ; X32:       ## BB#0: ## %entry
>>  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
>>  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
>> -; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
>> +; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
>>  ; X32-NEXT:    movlpd %xmm0, (%eax)
>>  ; X32-NEXT:    retl
>>  ;
>>  ; X64-LABEL: test0:
>>  ; X64:       ## BB#0: ## %entry
>>  ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
>> -; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
>> +; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
>>  ; X64-NEXT:    movq %xmm0, (%rdi)
>>  ; X64-NEXT:    retq
>>  entry:
>> @@ -84,16 +84,15 @@ define void @test2() nounwind {
>>  ; X32:       ## BB#0: ## %entry
>>  ; X32-NEXT:    movl L_tmp_V2i$non_lazy_ptr, %eax
>>  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
>> -; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
>> -; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
>> -; X32-NEXT:    movlpd %xmm0, (%eax)
>> +; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
>> +; X32-NEXT:    movlps %xmm0, (%eax)
>>  ; X32-NEXT:    retl
>>  ;
>>  ; X64-LABEL: test2:
>>  ; X64:       ## BB#0: ## %entry
>>  ; X64-NEXT:    movq _tmp_V2i@{{.*}}(%rip), %rax
>>  ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
>> -; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
>> +; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
>>  ; X64-NEXT:    movq %xmm0, (%rax)
>>  ; X64-NEXT:    retq
>>  entry:
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150330/d8a127d9/attachment.html>