[llvm] r231380 - [DagCombiner] Allow shuffles to merge through bitcasts

Sun Mar 29 19:43:58 PDT 2015

Hi Simon,

This commit caused regression for a small test case as described in ticket
https://llvm.org/bugs/show_bug.cgi?id=23065 .

Thanks,
-Jiangning

2015-03-06 1:14 GMT+08:00 Simon Pilgrim <llvm-dev at redking.me.uk>:

> Author: rksimon
> Date: Thu Mar  5 11:14:04 2015
> New Revision: 231380
>
> URL: http://llvm.org/viewvc/llvm-project?rev=231380&view=rev
> Log:
> [DagCombiner] Allow shuffles to merge through bitcasts
>
> Currently shuffles may only be combined if they are of the same type,
> despite the fact that bitcasts are often introduced in between shuffle
> nodes (e.g. x86 shuffle type widening).
>
> This patch allows a single input shuffle to peek through bitcasts and if
> the input is another shuffle will merge them, shuffling using the smallest
> sized type, and re-applying the bitcasts at the inputs and output instead.
>
> Dropped old ShuffleToZext test - this patch removes the use of the zext
> and vector-zext.ll covers these anyhow.
>
> Differential Revision: http://reviews.llvm.org/D7939
>
> Removed:
>     llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar  5
> 11:14:04 2015
> @@ -11877,6 +11877,89 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
>        return V;
>    }
>
> +  // If this shuffle only has a single input that is a bitcasted shuffle,
> +  // attempt to merge the 2 shuffles and suitably bitcast the
> inputs/output
> +  // back to their original types.
> +  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
> +      N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
> +      TLI.isTypeLegal(VT)) {
> +
> +    // Peek through the bitcast only if there is one user.
> +    SDValue BC0 = N0;
> +    while (BC0.getOpcode() == ISD::BITCAST) {
> +      if (!BC0.hasOneUse())
> +        break;
> +      BC0 = BC0.getOperand(0);
> +    }
> +
> +    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
> +      if (Scale == 1)
> +        return SmallVector<int, 8>(Mask.begin(), Mask.end());
> +
> +      SmallVector<int, 8> NewMask;
> +      for (int M : Mask)
> +        for (int s = 0; s != Scale; ++s)
> +          NewMask.push_back(M < 0 ? -1 : Scale * M + s);
> +      return NewMask;
> +    };
> +
> +    if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
> +      EVT SVT = VT.getScalarType();
> +      EVT InnerVT = BC0->getValueType(0);
> +      EVT InnerSVT = InnerVT.getScalarType();
> +
> +      // Determine which shuffle works with the smaller scalar type.
> +      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
> +      EVT ScaleSVT = ScaleVT.getScalarType();
> +
> +      if (TLI.isTypeLegal(ScaleVT) &&
> +          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
> +          0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
> +
> +        int InnerScale = InnerSVT.getSizeInBits() /
> ScaleSVT.getSizeInBits();
> +        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
> +
> +        // Scale the shuffle masks to the smaller scalar type.
> +        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
> +        SmallVector<int, 8> InnerMask =
> +            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
> +        SmallVector<int, 8> OuterMask =
> +            ScaleShuffleMask(SVN->getMask(), OuterScale);
> +
> +        // Merge the shuffle masks.
> +        SmallVector<int, 8> NewMask;
> +        for (int M : OuterMask)
> +          NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
> +
> +        // Test for shuffle mask legality over both commutations.
> +        SDValue SV0 = BC0->getOperand(0);
> +        SDValue SV1 = BC0->getOperand(1);
> +        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
> +        if (!LegalMask) {
> +          for (int i = 0, e = (int)NewMask.size(); i != e; ++i) {
> +            int idx = NewMask[i];
> +            if (idx < 0)
> +              continue;
> +            else if (idx < e)
> +              NewMask[i] = idx + e;
> +            else
> +              NewMask[i] = idx - e;
> +          }
> +          std::swap(SV0, SV1);
> +          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
> +        }
> +
> +        if (LegalMask) {
> +          SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
> +          SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
> +          return DAG.getNode(
> +              ISD::BITCAST, SDLoc(N), VT,
> +              DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
> +        }
> +      }
> +    }
> +  }
> +
>    // Canonicalize shuffles according to rules:
>    //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
>    //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
>
> Removed: llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll?rev=231379&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (removed)
> @@ -1,14 +0,0 @@
> -; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 |
> FileCheck %s
> -
> -; CHECK: test
> -; CHECK: vpmovzxwd
> -; CHECK: vpmovzxwd
> -define void @test(<4 x i64> %a, <4 x i16>* %buf) {
> -  %ex1 = extractelement <4 x i64> %a, i32 0
> -  %ex2 = extractelement <4 x i64> %a, i32 1
> -  %x1 = bitcast i64 %ex1 to <4 x i16>
> -  %x2 = bitcast i64 %ex2 to <4 x i16>
> -  %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32
> 1, i32 4, i32 5>
> -  store <4 x i16> %Sh, <4 x i16>* %buf, align 1
> -  ret void
> -}
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Thu Mar  5
> 11:14:04 2015
> @@ -1336,3 +1336,22 @@ define <16 x i8> @shuffle_v16i8_uu_02_03
>    %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x
> i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32
> undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
>    ret <16 x i8> %shuffle
>  }
> +
> +define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8>
> %b) {
> +; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
> +; SSE:       # BB#0:
> +; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; SSE-NEXT:    retq
> +;
> +; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; AVX-NEXT:    retq
> +  %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32
> 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32
> 2, i32 18, i32 1, i32 17, i32 0, i32 16>
> +  %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
> +  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
> <4 x i32> <i32 3, i32 2, i32 1, i32 0>
> +  %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
> +  %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x
> i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
> +  %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
> +  ret <16 x i8> %bitcast8
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Thu Mar  5
> 11:14:04 2015
> @@ -810,6 +810,25 @@ define <2 x double> @shuffle_v2f64_z1(<2
>    ret <2 x double> %shuffle
>  }
>
> +define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
> +; SSE-LABEL: shuffle_v2f64_bitcast_1z:
> +; SSE:       # BB#0:
> +; SSE-NEXT:    xorpd %xmm1, %xmm1
> +; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
> +; SSE-NEXT:    retq
> +;
> +; AVX-LABEL: shuffle_v2f64_bitcast_1z:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
> +; AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
> +; AVX-NEXT:    retq
> +  %shuffle64 = shufflevector <2 x double> %a, <2 x double>
> zeroinitializer, <2 x i32> <i32 2, i32 1>
> +  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
> +  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
> <4 x i32> <i32 2, i32 3, i32 0, i32 1>
> +  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
> +  ret <2 x double> %bitcast64
> +}
> +
>  define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
>  ; SSE-LABEL: insert_reg_and_zero_v2i64:
>  ; SSE:       # BB#0:
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Thu Mar  5
> 11:14:04 2015
> @@ -1574,6 +1574,23 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4
>    ret <4 x i32> %shuffle
>  }
>
> +define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
> +; SSE-LABEL: shuffle_v4i32_bitcast_0415:
> +; SSE:       # BB#0:
> +; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; SSE-NEXT:    retq
> +;
> +; AVX-LABEL: shuffle_v4i32_bitcast_0415:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; AVX-NEXT:    retq
> +  %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32
> 1, i32 5, i32 0, i32 4>
> +  %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
> +  %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef,
> <2 x i32> <i32 1, i32 0>
> +  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
> +  ret <4 x i32> %bitcast32
> +}
> +
>  define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
>  ; SSE-LABEL: insert_reg_and_zero_v4i32:
>  ; SSE:       # BB#0:
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Thu Mar  5
> 11:14:04 2015
> @@ -922,3 +922,22 @@ define <4 x double> @splat_v4f64(<2 x do
>    %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32>
> zeroinitializer
>    ret <4 x double> %1
>  }
> +
> +define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b)
> {
> +; AVX1-LABEL: bitcast_v4f64_0426:
> +; AVX1:       # BB#0:
> +; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
> +; AVX1-NEXT:    retq
> +;
> +; AVX2-LABEL: bitcast_v4f64_0426:
> +; AVX2:       # BB#0:
> +; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} ymm0 =
> ymm0[0],ymm1[0],ymm0[2],ymm1[2]
> +; AVX2-NEXT:    retq
> +  %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
> <i32 4, i32 0, i32 6, i32 2>
> +  %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
> +  %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef,
> <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
> +  %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
> +  %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16
> x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32
> 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
> +  %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
> +  ret <4 x double> %bitcast64
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll Thu Mar  5 11:14:04
> 2015
> @@ -8,14 +8,14 @@ define void @test0(<1 x i64>* %x) {
>  ; X32:       ## BB#0: ## %entry
>  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
> +; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
>  ; X32-NEXT:    movlpd %xmm0, (%eax)
>  ; X32-NEXT:    retl
>  ;
>  ; X64-LABEL: test0:
>  ; X64:       ## BB#0: ## %entry
>  ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
> -; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
> +; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
>  ; X64-NEXT:    movq %xmm0, (%rdi)
>  ; X64-NEXT:    retq
>  entry:
> @@ -84,16 +84,15 @@ define void @test2() nounwind {
>  ; X32:       ## BB#0: ## %entry
>  ; X32-NEXT:    movl L_tmp_V2i$non_lazy_ptr, %eax
>  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
> -; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
> -; X32-NEXT:    movlpd %xmm0, (%eax)
> +; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
> +; X32-NEXT:    movlps %xmm0, (%eax)
>  ; X32-NEXT:    retl
>  ;
>  ; X64-LABEL: test2:
>  ; X64:       ## BB#0: ## %entry
>  ; X64-NEXT:    movq _tmp_V2i@{{.*}}(%rip), %rax
>  ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
> -; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
> +; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
>  ; X64-NEXT:    movq %xmm0, (%rax)
>  ; X64-NEXT:    retq
>  entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150330/4facb682/attachment.html>