[llvm] r369226 - [X86] Teach lower1BitShuffle to recognize padding a subvector with zeros with V2 as the source and V1 as the zero vector.

Sun Aug 18 17:39:22 PDT 2019

Author: ctopper
Date: Sun Aug 18 17:39:22 2019
New Revision: 369226

URL: http://llvm.org/viewvc/llvm-project?rev=369226&view=rev
Log:
[X86] Teach lower1BitShuffle to recognize padding a subvector with zeros with V2 as the source and V1 as the zero vector.

Shuffle canonicalization can swap the sources so the zero vector
might be V1 and the subvector that's being padded can be V2.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=369226&r1=369225&r2=369226&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Aug 18 17:39:22 2019
@@ -16551,13 +16551,20 @@ static SDValue lower1BitShuffle(const SD
   assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/o basic ISA!");
 
-  unsigned NumElts = Mask.size();
+  int NumElts = Mask.size();
 
   // Try to recognize shuffles that are just padding a subvector with zeros.
-  unsigned SubvecElts = 0;
-  for (int i = 0; i != (int)NumElts; ++i) {
-    if (Mask[i] >= 0 && Mask[i] != i)
-      break;
+  int SubvecElts = 0;
+  int Src = -1;
+  for (int i = 0; i != NumElts; ++i) {
+    if (Mask[i] >= 0) {
+      // Grab the source from the first valid mask. All subsequent elements need
+      // to use this same source.
+      if (Src < 0)
+        Src = Mask[i] / NumElts;
+      if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i)
+        break;
+    }
 
     ++SubvecElts;
   }
@@ -16568,10 +16575,12 @@ static SDValue lower1BitShuffle(const SD
 
   // Make sure the number of zeroable bits in the top at least covers the bits
   // not covered by the subvector.
-  if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+  if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+    assert(Src >= 0 && "Expected a source!");
     MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
     SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
-                                  V1, DAG.getIntPtrConstant(0, DL));
+                                  Src == 0 ? V1 : V2,
+                                  DAG.getIntPtrConstant(0, DL));
     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
                        DAG.getConstant(0, DL, VT),
                        Extract, DAG.getIntPtrConstant(0, DL));

Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=369226&r1=369225&r2=369226&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Sun Aug 18 17:39:22 2019
@@ -205,12 +205,8 @@ define i8 @test15(<2 x i64> %x) {
 ; CHECK-LABEL: test15:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k0
-; CHECK-NEXT:    vpmovm2d %k0, %ymm0
-; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; CHECK-NEXT:    vpmovd2m %ymm0, %k0
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
-; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %a = icmp eq <2 x i64> %x, zeroinitializer
   %b = shufflevector <2 x i1> %a, <2 x i1> <i1 false, i1 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>