[llvm] r221684 - [X86] Add missing check for 'isINSERTPSMask' in method 'isShuffleMaskLegal'.

Wed Nov 12 13:39:23 PST 2014

Its rather odd that a lot of those swizzle tests only got optimised because of the addition of the isINSERTPSMask.

By the looks of it isSHUFPMask only matches shufps/shufpd with 2 distinct inputs - it doesn’t recognise it can be used with the same input to both arguments. Similarly isSHUFDMask only matches against the first input. In fact most of the mask matching functions don’t seem to be good at dealing with more general input cases.

Most of this would be trivial to fix but would involve some refactoring of the old vector shuffle lowering code - is this worth doing now or waiting until the old code gets removed?

Simon.

On 11 Nov 2014, at 11:20, Andrea Di Biagio <Andrea_DiBiagio at sn.scee.net> wrote:

> Author: adibiagio
> Date: Tue Nov 11 05:20:31 2014
> New Revision: 221684
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=221684&view=rev
> Log:
> [X86] Add missing check for 'isINSERTPSMask' in method 'isShuffleMaskLegal'.
> 
> This helps the DAGCombiner to identify more opportunities to fold shuffles.
> 
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
> 
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=221684&r1=221683&r2=221684&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 11 05:20:31 2014
> @@ -19482,7 +19482,8 @@ X86TargetLowering::isShuffleMaskLegal(co
>           isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
>           isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
>           isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
> -          isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
> +          isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()) ||
> +          (Subtarget->hasSSE41() && isINSERTPSMask(M, SVT)));
> }
> 
> bool
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=221684&r1=221683&r2=221684&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Tue Nov 11 05:20:31 2014
> @@ -1621,17 +1621,13 @@ define <4 x float> @combine_test1b(<4 x
> ;
> ; SSE41-LABEL: combine_test1b:
> ; SSE41:       # BB#0:
> -; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
> -; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
> -; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0]
> +; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
> ; SSE41-NEXT:    movaps %xmm1, %xmm0
> ; SSE41-NEXT:    retq
> ;
> ; AVX-LABEL: combine_test1b:
> ; AVX:       # BB#0:
> -; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
> -; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
> -; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[2,0]
> +; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0]
> ; AVX-NEXT:    retq
>   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
>   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
> @@ -1722,17 +1718,13 @@ define <4 x float> @combine_test4b(<4 x
> ;
> ; SSE41-LABEL: combine_test4b:
> ; SSE41:       # BB#0:
> -; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
> -; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
> -; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[0,2]
> +; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
> ; SSE41-NEXT:    movaps %xmm1, %xmm0
> ; SSE41-NEXT:    retq
> ;
> ; AVX-LABEL: combine_test4b:
> ; AVX:       # BB#0:
> -; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
> -; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
> -; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[1,1],xmm0[0,2]
> +; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3]
> ; AVX-NEXT:    retq
>   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
>   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
> @@ -2565,3 +2557,63 @@ define <8 x i32> @combine_unneeded_subve
>   %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
>   ret <8 x i32> %d
> }
> +
> +define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) {
> +; SSE41-LABEL: combine_insertps1:
> +; SSE41:       # BB#0:
> +; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
> +; SSE41-NEXT:    retq
> +
> +; AVX-LABEL: combine_insertps1:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
> +; AVX-NEXT:    retq
> +  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 6, i32 2, i32 4>
> +  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
> +  ret <4 x float> %d
> +}
> +
> +define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) {
> +; SSE41-LABEL: combine_insertps2:
> +; SSE41:       # BB#0:
> +; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
> +; SSE41-NEXT:    retq
> +
> +; AVX-LABEL: combine_insertps2:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
> +; AVX-NEXT:    retq
> +  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 1, i32 6, i32 7>
> +  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
> +  ret <4 x float> %d
> +}
> +
> +define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) {
> +; SSE41-LABEL: combine_insertps3:
> +; SSE41:       # BB#0:
> +; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
> +; SSE41-NEXT:    retq
> +
> +; AVX-LABEL: combine_insertps3:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
> +; AVX-NEXT:    retq
> +  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5>
> +  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 5, i32 3>
> +  ret <4 x float> %d
> +}
> +
> +define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) {
> +; SSE41-LABEL: combine_insertps4:
> +; SSE41:       # BB#0:
> +; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
> +; SSE41-NEXT:    retq
> +
> +; AVX-LABEL: combine_insertps4:
> +; AVX:       # BB#0:
> +; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
> +; AVX-NEXT:    retq
> +  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5>
> +  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 6, i32 5>
> +  ret <4 x float> %d
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits