[llvm] r231380 - [DagCombiner] Allow shuffles to merge through bitcasts
Jiangning Liu
liujiangning1 at gmail.com
Sun Mar 29 19:43:58 PDT 2015
Hi Simon,
This commit caused regression for a small test case as described in ticket
https://llvm.org/bugs/show_bug.cgi?id=23065 .
Thanks,
-Jiangning
2015-03-06 1:14 GMT+08:00 Simon Pilgrim <llvm-dev at redking.me.uk>:
> Author: rksimon
> Date: Thu Mar 5 11:14:04 2015
> New Revision: 231380
>
> URL: http://llvm.org/viewvc/llvm-project?rev=231380&view=rev
> Log:
> [DagCombiner] Allow shuffles to merge through bitcasts
>
> Currently shuffles may only be combined if they are of the same type,
> despite the fact that bitcasts are often introduced in between shuffle
> nodes (e.g. x86 shuffle type widening).
>
> This patch allows a single input shuffle to peek through bitcasts and if
> the input is another shuffle will merge them, shuffling using the smallest
> sized type, and re-applying the bitcasts at the inputs and output instead.
>
> Dropped old ShuffleToZext test - this patch removes the use of the zext
> and vector-zext.ll covers these anyhow.
>
> Differential Revision: http://reviews.llvm.org/D7939
>
> Removed:
> llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
> llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
> llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
> llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
> llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Mar 5
> 11:14:04 2015
> @@ -11877,6 +11877,89 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
> return V;
> }
>
> + // If this shuffle only has a single input that is a bitcasted shuffle,
> + // attempt to merge the 2 shuffles and suitably bitcast the
> inputs/output
> + // back to their original types.
> + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
> + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
> + TLI.isTypeLegal(VT)) {
> +
> + // Peek through the bitcast only if there is one user.
> + SDValue BC0 = N0;
> + while (BC0.getOpcode() == ISD::BITCAST) {
> + if (!BC0.hasOneUse())
> + break;
> + BC0 = BC0.getOperand(0);
> + }
> +
> + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
> + if (Scale == 1)
> + return SmallVector<int, 8>(Mask.begin(), Mask.end());
> +
> + SmallVector<int, 8> NewMask;
> + for (int M : Mask)
> + for (int s = 0; s != Scale; ++s)
> + NewMask.push_back(M < 0 ? -1 : Scale * M + s);
> + return NewMask;
> + };
> +
> + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
> + EVT SVT = VT.getScalarType();
> + EVT InnerVT = BC0->getValueType(0);
> + EVT InnerSVT = InnerVT.getScalarType();
> +
> + // Determine which shuffle works with the smaller scalar type.
> + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
> + EVT ScaleSVT = ScaleVT.getScalarType();
> +
> + if (TLI.isTypeLegal(ScaleVT) &&
> + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
> + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
> +
> + int InnerScale = InnerSVT.getSizeInBits() /
> ScaleSVT.getSizeInBits();
> + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
> +
> + // Scale the shuffle masks to the smaller scalar type.
> + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
> + SmallVector<int, 8> InnerMask =
> + ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
> + SmallVector<int, 8> OuterMask =
> + ScaleShuffleMask(SVN->getMask(), OuterScale);
> +
> + // Merge the shuffle masks.
> + SmallVector<int, 8> NewMask;
> + for (int M : OuterMask)
> + NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
> +
> + // Test for shuffle mask legality over both commutations.
> + SDValue SV0 = BC0->getOperand(0);
> + SDValue SV1 = BC0->getOperand(1);
> + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
> + if (!LegalMask) {
> + for (int i = 0, e = (int)NewMask.size(); i != e; ++i) {
> + int idx = NewMask[i];
> + if (idx < 0)
> + continue;
> + else if (idx < e)
> + NewMask[i] = idx + e;
> + else
> + NewMask[i] = idx - e;
> + }
> + std::swap(SV0, SV1);
> + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
> + }
> +
> + if (LegalMask) {
> + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
> + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
> + return DAG.getNode(
> + ISD::BITCAST, SDLoc(N), VT,
> + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
> + }
> + }
> + }
> + }
> +
> // Canonicalize shuffles according to rules:
> // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
> // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
>
> Removed: llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll?rev=231379&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll (removed)
> @@ -1,14 +0,0 @@
> -; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 |
> FileCheck %s
> -
> -; CHECK: test
> -; CHECK: vpmovzxwd
> -; CHECK: vpmovzxwd
> -define void @test(<4 x i64> %a, <4 x i16>* %buf) {
> - %ex1 = extractelement <4 x i64> %a, i32 0
> - %ex2 = extractelement <4 x i64> %a, i32 1
> - %x1 = bitcast i64 %ex1 to <4 x i16>
> - %x2 = bitcast i64 %ex2 to <4 x i16>
> - %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32
> 1, i32 4, i32 5>
> - store <4 x i16> %Sh, <4 x i16>* %buf, align 1
> - ret void
> -}
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Thu Mar 5
> 11:14:04 2015
> @@ -1336,3 +1336,22 @@ define <16 x i8> @shuffle_v16i8_uu_02_03
> %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x
> i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32
> undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
> ret <16 x i8> %shuffle
> }
> +
> +define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8>
> %b) {
> +; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
> +; SSE: # BB#0:
> +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; SSE-NEXT: retq
> +;
> +; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
> +; AVX: # BB#0:
> +; AVX-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; AVX-NEXT: retq
> + %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32
> 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32
> 2, i32 18, i32 1, i32 17, i32 0, i32 16>
> + %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
> + %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
> <4 x i32> <i32 3, i32 2, i32 1, i32 0>
> + %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
> + %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x
> i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
> + %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8>
> + ret <16 x i8> %bitcast8
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Thu Mar 5
> 11:14:04 2015
> @@ -810,6 +810,25 @@ define <2 x double> @shuffle_v2f64_z1(<2
> ret <2 x double> %shuffle
> }
>
> +define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
> +; SSE-LABEL: shuffle_v2f64_bitcast_1z:
> +; SSE: # BB#0:
> +; SSE-NEXT: xorpd %xmm1, %xmm1
> +; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
> +; SSE-NEXT: retq
> +;
> +; AVX-LABEL: shuffle_v2f64_bitcast_1z:
> +; AVX: # BB#0:
> +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
> +; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
> +; AVX-NEXT: retq
> + %shuffle64 = shufflevector <2 x double> %a, <2 x double>
> zeroinitializer, <2 x i32> <i32 2, i32 1>
> + %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
> + %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef,
> <4 x i32> <i32 2, i32 3, i32 0, i32 1>
> + %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
> + ret <2 x double> %bitcast64
> +}
> +
> define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
> ; SSE-LABEL: insert_reg_and_zero_v2i64:
> ; SSE: # BB#0:
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Thu Mar 5
> 11:14:04 2015
> @@ -1574,6 +1574,23 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4
> ret <4 x i32> %shuffle
> }
>
> +define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
> +; SSE-LABEL: shuffle_v4i32_bitcast_0415:
> +; SSE: # BB#0:
> +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; SSE-NEXT: retq
> +;
> +; AVX-LABEL: shuffle_v4i32_bitcast_0415:
> +; AVX: # BB#0:
> +; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
> +; AVX-NEXT: retq
> + %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32
> 1, i32 5, i32 0, i32 4>
> + %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
> + %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef,
> <2 x i32> <i32 1, i32 0>
> + %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
> + ret <4 x i32> %bitcast32
> +}
> +
> define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
> ; SSE-LABEL: insert_reg_and_zero_v4i32:
> ; SSE: # BB#0:
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Thu Mar 5
> 11:14:04 2015
> @@ -922,3 +922,22 @@ define <4 x double> @splat_v4f64(<2 x do
> %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32>
> zeroinitializer
> ret <4 x double> %1
> }
> +
> +define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b)
> {
> +; AVX1-LABEL: bitcast_v4f64_0426:
> +; AVX1: # BB#0:
> +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
> +; AVX1-NEXT: retq
> +;
> +; AVX2-LABEL: bitcast_v4f64_0426:
> +; AVX2: # BB#0:
> +; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 =
> ymm0[0],ymm1[0],ymm0[2],ymm1[2]
> +; AVX2-NEXT: retq
> + %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
> <i32 4, i32 0, i32 6, i32 2>
> + %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
> + %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef,
> <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
> + %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
> + %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16
> x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32
> 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
> + %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
> + ret <4 x double> %bitcast64
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll?rev=231380&r1=231379&r2=231380&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll Thu Mar 5 11:14:04
> 2015
> @@ -8,14 +8,14 @@ define void @test0(<1 x i64>* %x) {
> ; X32: ## BB#0: ## %entry
> ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
> ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
> +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
> ; X32-NEXT: movlpd %xmm0, (%eax)
> ; X32-NEXT: retl
> ;
> ; X64-LABEL: test0:
> ; X64: ## BB#0: ## %entry
> ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
> -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
> +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
> ; X64-NEXT: movq %xmm0, (%rdi)
> ; X64-NEXT: retq
> entry:
> @@ -84,16 +84,15 @@ define void @test2() nounwind {
> ; X32: ## BB#0: ## %entry
> ; X32-NEXT: movl L_tmp_V2i$non_lazy_ptr, %eax
> ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
> -; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
> -; X32-NEXT: movlpd %xmm0, (%eax)
> +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
> +; X32-NEXT: movlps %xmm0, (%eax)
> ; X32-NEXT: retl
> ;
> ; X64-LABEL: test2:
> ; X64: ## BB#0: ## %entry
> ; X64-NEXT: movq _tmp_V2i@{{.*}}(%rip), %rax
> ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
> -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
> +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
> ; X64-NEXT: movq %xmm0, (%rax)
> ; X64-NEXT: retq
> entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150330/4facb682/attachment.html>
More information about the llvm-commits
mailing list