[llvm] r341704 - [X86] Don't create X86ISD::AVG nodes from v1iX vectors.

Fri Sep 7 13:56:01 PDT 2018

Author: ctopper
Date: Fri Sep  7 13:56:01 2018
New Revision: 341704

URL: http://llvm.org/viewvc/llvm-project?rev=341704&view=rev
Log:
[X86] Don't create X86ISD::AVG nodes from v1iX vectors.

The type legalizer will try to scalarize this and fail.

It looks like there's some other v1iX oddities out there too since we still generated some vector instructions.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avg.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=341704&r1=341703&r2=341704&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep  7 13:56:01 2018
@@ -35956,7 +35956,7 @@ static SDValue detectAVGPattern(SDValue
 
   EVT ScalarVT = VT.getVectorElementType();
   if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
-        isPowerOf2_32(NumElems)))
+        NumElems >= 2 && isPowerOf2_32(NumElems)))
     return SDValue();
 
   // InScalarVT is the intermediate type in AVG pattern and it should be greater

Modified: llvm/trunk/test/CodeGen/X86/avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avg.ll?rev=341704&r1=341703&r2=341704&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avg.ll Fri Sep  7 13:56:01 2018
@@ -2896,3 +2896,43 @@ define void @not_avg_v16i8_wide_constant
   store <16 x i8> %8, <16 x i8>* undef, align 4
   ret void
 }
+
+; Make sure we don't fail on single element vectors.
+define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
+; SSE2-LABEL: avg_v1i8:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movd %edi, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    leal 1(%rax,%rcx), %eax
+; SSE2-NEXT:    shrl %eax
+; SSE2-NEXT:    # kill: def $al killed $al killed $eax
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v1i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT:    vmovd %xmm0, %ecx
+; AVX-NEXT:    leal 1(%rax,%rcx), %eax
+; AVX-NEXT:    shrl %eax
+; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    retq
+  %a = zext <1 x i8> %x to <1 x i16>
+  %b = zext <1 x i8> %y to <1 x i16>
+  %c = add <1 x i16> %a, %b
+  %d = add <1 x i16> %c, <i16 1>
+  %e = lshr <1 x i16> %d, <i16 1>
+  %f = trunc <1 x i16> %e to <1 x i8>
+  ret <1 x i8> %f
+}
+