[llvm] r341704 - [X86] Don't create X86ISD::AVG nodes from v1iX vectors.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 7 13:56:01 PDT 2018
Author: ctopper
Date: Fri Sep 7 13:56:01 2018
New Revision: 341704
URL: http://llvm.org/viewvc/llvm-project?rev=341704&view=rev
Log:
[X86] Don't create X86ISD::AVG nodes from v1iX vectors.
The type legalizer will try to scalarize this and fail.
It looks like there's some other v1iX oddities out there too since we still generated some vector instructions.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avg.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=341704&r1=341703&r2=341704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 7 13:56:01 2018
@@ -35956,7 +35956,7 @@ static SDValue detectAVGPattern(SDValue
EVT ScalarVT = VT.getVectorElementType();
if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
- isPowerOf2_32(NumElems)))
+ NumElems >= 2 && isPowerOf2_32(NumElems)))
return SDValue();
// InScalarVT is the intermediate type in AVG pattern and it should be greater
Modified: llvm/trunk/test/CodeGen/X86/avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avg.ll?rev=341704&r1=341703&r2=341704&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avg.ll Fri Sep 7 13:56:01 2018
@@ -2896,3 +2896,43 @@ define void @not_avg_v16i8_wide_constant
store <16 x i8> %8, <16 x i8>* undef, align 4
ret void
}
+
+; Make sure we don't fail on single element vectors.
+define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
+; SSE2-LABEL: avg_v1i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movd %edi, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: leal 1(%rax,%rcx), %eax
+; SSE2-NEXT: shrl %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v1i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovd %esi, %xmm0
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT: vmovd %xmm0, %ecx
+; AVX-NEXT: leal 1(%rax,%rcx), %eax
+; AVX-NEXT: shrl %eax
+; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: retq
+ %a = zext <1 x i8> %x to <1 x i16>
+ %b = zext <1 x i8> %y to <1 x i16>
+ %c = add <1 x i16> %a, %b
+ %d = add <1 x i16> %c, <i16 1>
+ %e = lshr <1 x i16> %d, <i16 1>
+ %f = trunc <1 x i16> %e to <1 x i8>
+ ret <1 x i8> %f
+}
+
More information about the llvm-commits
mailing list