[llvm] r324782 - [X86] Teach lower1BitVectorShuffle to recognize shuffles that are just filling upper elements with zero. Replace with insert_subvector.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 15:32:27 PST 2018


Author: ctopper
Date: Fri Feb  9 15:32:27 2018
New Revision: 324782

URL: http://llvm.org/viewvc/llvm-project?rev=324782&view=rev
Log:
[X86] Teach lower1BitVectorShuffle to recognize shuffles that are just filling upper elements with zero. Replace with insert_subvector.

There's still some extra kshifts in one of the modified test cases here, but hopefully that's only a DAG combine away.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324782&r1=324781&r2=324782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb  9 15:32:27 2018
@@ -14414,8 +14414,36 @@ static SDValue lower512BitVectorShuffle(
 // vector, shuffle and then truncate it back.
 static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                       MVT VT, SDValue V1, SDValue V2,
+                                      const APInt &Zeroable,
                                       const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
+  unsigned NumElts = Mask.size();
+
+  // Try to recognize shuffles that are just padding a subvector with zeros.
+  unsigned SubvecElts = 0;
+  for (int i = 0; i != (int)NumElts; ++i) {
+    if (Mask[i] >= 0 && Mask[i] != i)
+      break;
+
+    ++SubvecElts;
+  }
+  assert(SubvecElts != NumElts && "Identity shuffle?");
+
+  // Clip to a power 2.
+  SubvecElts = PowerOf2Floor(SubvecElts);
+
+  // Make sure the number of zeroable bits in the top at least covers the bits
+  // not covered by the subvector.
+  if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+    MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
+    SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
+                                  V1, DAG.getIntPtrConstant(0, DL));
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+                       getZeroVector(VT, Subtarget, DAG, DL),
+                       Extract, DAG.getIntPtrConstant(0, DL));
+  }
+
+
   assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/o basic ISA!");
   MVT ExtVT;
@@ -14624,7 +14652,8 @@ static SDValue lowerVectorShuffle(SDValu
                                     DAG);
 
   if (Is1BitVector)
-    return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
+    return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
+                                  DAG);
 
   llvm_unreachable("Unimplemented!");
 }

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324782&r1=324781&r2=324782&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Fri Feb  9 15:32:27 2018
@@ -2757,19 +2757,16 @@ define <8 x i64> @mask_widening(<2 x i64
 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
 ; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
 ; KNL-NEXT:    kshiftlw $12, %k0, %k0
-; KNL-NEXT:    kshiftrw $12, %k0, %k1
-; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    vmovdqa %ymm0, %ymm0
-; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    kshiftrw $12, %k0, %k0
+; KNL-NEXT:    kshiftlw $8, %k0, %k0
+; KNL-NEXT:    kshiftrw $8, %k0, %k1
 ; KNL-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: mask_widening:
 ; SKX:       ## %bb.0: ## %entry
 ; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
-; SKX-NEXT:    vpmovm2d %k0, %zmm0
-; SKX-NEXT:    vmovdqa %ymm0, %ymm0
-; SKX-NEXT:    vpmovd2m %zmm0, %k1
+; SKX-NEXT:    kmovb %k0, %k1
 ; SKX-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
 ; SKX-NEXT:    retq
 ;
@@ -2779,10 +2776,9 @@ define <8 x i64> @mask_widening(<2 x i64
 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
 ; AVX512BW-NEXT:    kshiftlw $12, %k0, %k0
-; AVX512BW-NEXT:    kshiftrw $12, %k0, %k1
-; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512BW-NEXT:    vmovdqa %ymm0, %ymm0
-; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512BW-NEXT:    kshiftrw $12, %k0, %k0
+; AVX512BW-NEXT:    kshiftlw $8, %k0, %k0
+; AVX512BW-NEXT:    kshiftrw $8, %k0, %k1
 ; AVX512BW-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
 ; AVX512BW-NEXT:    retq
 ;
@@ -2793,9 +2789,7 @@ define <8 x i64> @mask_widening(<2 x i64
 ; AVX512DQ-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
 ; AVX512DQ-NEXT:    kshiftlb $4, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrb $4, %k0, %k0
-; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
-; AVX512DQ-NEXT:    vmovdqa %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
+; AVX512DQ-NEXT:    kmovb %k0, %k1
 ; AVX512DQ-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
 ; AVX512DQ-NEXT:    retq
 entry:




More information about the llvm-commits mailing list