[llvm] r374579 - [X86][SSE] Add support for v4i8 add reduction

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 12 04:58:49 PDT 2019


Definitely unnecessary, but this is coming from a scalar_to_vector 
unfortunately.

I'm going to investigate doing an explicit zextload from v4i8 to a v4i32 
and then perform as a v16i8 reduction.

Simon.

On 11/10/2019 19:51, Craig Topper wrote:
> Why do the load cases use a movzxdq after the movd? That seems 
> unnecessary. The movd should have generated 0s already.
>
> ~Craig
>
>
> On Fri, Oct 11, 2019 at 10:51 AM Simon Pilgrim via llvm-commits 
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
>     Author: rksimon
>     Date: Fri Oct 11 10:54:15 2019
>     New Revision: 374579
>
>     URL: http://llvm.org/viewvc/llvm-project?rev=374579&view=rev
>     Log:
>     [X86][SSE] Add support for v4i8 add reduction
>
>     Modified:
>         llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>         llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll
>
>     Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=374579&r1=374578&r2=374579&view=diff
>     ==============================================================================
>     --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>     +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Oct 11
>     10:54:15 2019
>     @@ -36239,10 +36239,15 @@ static SDValue combineReductionToHorizon
>
>        SDLoc DL(ExtElt);
>
>     -  if (VecVT == MVT::v8i8) {
>     +  // vXi8 reduction - sub 128-bit vector.
>     +  if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
>     +    // Pad with zero.
>     +    if (VecVT == MVT::v4i8)
>     +      Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx,
>     +                        DAG.getConstant(0, DL, VecVT));
>          // Pad with undef.
>          Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
>     -                      DAG.getUNDEF(VecVT));
>     +                      DAG.getUNDEF(MVT::v8i8));
>          Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
>                            DAG.getConstant(0, DL, MVT::v16i8));
>          Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
>
>     Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll?rev=374579&r1=374578&r2=374579&view=diff
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll (original)
>     +++ llvm/trunk/test/CodeGen/X86/vector-reduce-add.ll Fri Oct 11
>     10:54:15 2019
>     @@ -1029,44 +1029,36 @@ define i8 @test_v2i8_load(<2 x i8>* %p)
>      define i8 @test_v4i8(<4 x i8> %a0) {
>      ; SSE2-LABEL: test_v4i8:
>      ; SSE2:       # %bb.0:
>     -; SSE2-NEXT:    movdqa %xmm0, %xmm1
>     -; SSE2-NEXT:    psrld $16, %xmm1
>     -; SSE2-NEXT:    paddb %xmm0, %xmm1
>     -; SSE2-NEXT:    movdqa %xmm1, %xmm0
>     -; SSE2-NEXT:    psrlw $8, %xmm0
>     -; SSE2-NEXT:    paddb %xmm1, %xmm0
>     +; SSE2-NEXT:    pxor %xmm1, %xmm1
>     +; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 =
>     xmm0[0],xmm1[0],xmm0[1],xmm1[1]
>     +; SSE2-NEXT:    psadbw %xmm1, %xmm0
>      ; SSE2-NEXT:    movd %xmm0, %eax
>      ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
>      ; SSE2-NEXT:    retq
>      ;
>      ; SSE41-LABEL: test_v4i8:
>      ; SSE41:       # %bb.0:
>     -; SSE41-NEXT:    movdqa %xmm0, %xmm1
>     -; SSE41-NEXT:    psrld $16, %xmm1
>     -; SSE41-NEXT:    paddb %xmm0, %xmm1
>     -; SSE41-NEXT:    movdqa %xmm1, %xmm0
>     -; SSE41-NEXT:    psrlw $8, %xmm0
>     -; SSE41-NEXT:    paddb %xmm1, %xmm0
>     -; SSE41-NEXT:    pextrb $0, %xmm0, %eax
>     +; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
>     +; SSE41-NEXT:    pxor %xmm1, %xmm1
>     +; SSE41-NEXT:    psadbw %xmm0, %xmm1
>     +; SSE41-NEXT:    pextrb $0, %xmm1, %eax
>      ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
>      ; SSE41-NEXT:    retq
>      ;
>      ; AVX-LABEL: test_v4i8:
>      ; AVX:       # %bb.0:
>     -; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
>     -; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     -; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
>     -; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     +; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
>     +; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>     +; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
>      ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
>      ; AVX-NEXT:    # kill: def $al killed $al killed $eax
>      ; AVX-NEXT:    retq
>      ;
>      ; AVX512-LABEL: test_v4i8:
>      ; AVX512:       # %bb.0:
>     -; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
>     -; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     -; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
>     -; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     +; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
>     +; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>     +; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
>      ; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
>      ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
>      ; AVX512-NEXT:    retq
>     @@ -1078,36 +1070,28 @@ define i8 @test_v4i8_load(<4 x i8>* %p)
>      ; SSE2-LABEL: test_v4i8_load:
>      ; SSE2:       # %bb.0:
>      ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
>     -; SSE2-NEXT:    movdqa %xmm0, %xmm1
>     -; SSE2-NEXT:    psrld $16, %xmm1
>     -; SSE2-NEXT:    paddb %xmm0, %xmm1
>     -; SSE2-NEXT:    movdqa %xmm1, %xmm0
>     -; SSE2-NEXT:    psrlw $8, %xmm0
>     -; SSE2-NEXT:    paddb %xmm1, %xmm0
>     -; SSE2-NEXT:    movd %xmm0, %eax
>     +; SSE2-NEXT:    pxor %xmm1, %xmm1
>     +; SSE2-NEXT:    psadbw %xmm0, %xmm1
>     +; SSE2-NEXT:    movd %xmm1, %eax
>      ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
>      ; SSE2-NEXT:    retq
>      ;
>      ; SSE41-LABEL: test_v4i8_load:
>      ; SSE41:       # %bb.0:
>      ; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
>     -; SSE41-NEXT:    movdqa %xmm0, %xmm1
>     -; SSE41-NEXT:    psrld $16, %xmm1
>     -; SSE41-NEXT:    paddb %xmm0, %xmm1
>     -; SSE41-NEXT:    movdqa %xmm1, %xmm0
>     -; SSE41-NEXT:    psrlw $8, %xmm0
>     -; SSE41-NEXT:    paddb %xmm1, %xmm0
>     -; SSE41-NEXT:    pextrb $0, %xmm0, %eax
>     +; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
>     +; SSE41-NEXT:    pxor %xmm1, %xmm1
>     +; SSE41-NEXT:    psadbw %xmm0, %xmm1
>     +; SSE41-NEXT:    pextrb $0, %xmm1, %eax
>      ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
>      ; SSE41-NEXT:    retq
>      ;
>      ; AVX-LABEL: test_v4i8_load:
>      ; AVX:       # %bb.0:
>      ; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
>     -; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
>     -; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     -; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
>     -; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     +; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
>     +; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>     +; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
>      ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
>      ; AVX-NEXT:    # kill: def $al killed $al killed $eax
>      ; AVX-NEXT:    retq
>     @@ -1115,10 +1099,9 @@ define i8 @test_v4i8_load(<4 x i8>* %p)
>      ; AVX512-LABEL: test_v4i8_load:
>      ; AVX512:       # %bb.0:
>      ; AVX512-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
>     -; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
>     -; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     -; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
>     -; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
>     +; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
>     +; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>     +; AVX512-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
>      ; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
>      ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
>      ; AVX512-NEXT:    retq
>
>
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191012/be497c34/attachment.html>


More information about the llvm-commits mailing list