[llvm] r369215 - [X86] Improve lower1BitShuffle handling for KSHIFTL on narrow vectors.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 18 11:52:46 PDT 2019


Author: ctopper
Date: Sun Aug 18 11:52:46 2019
New Revision: 369215

URL: http://llvm.org/viewvc/llvm-project?rev=369215&view=rev
Log:
[X86] Improve lower1BitShuffle handling for KSHIFTL on narrow vectors.

We can insert the value into a larger legal type and shift that
by the desired amount.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/kshift.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=369215&r1=369214&r2=369215&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Aug 18 11:52:46 2019
@@ -16562,20 +16562,36 @@ static SDValue lower1BitShuffle(const SD
   }
 
   // Try to match KSHIFTs.
-  // TODO: Support narrower than legal shifts by widening and extracting.
-  if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
-    unsigned Offset = 0;
-    for (SDValue V : { V1, V2 }) {
-      unsigned Opcode;
-      int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
-      if (ShiftAmt >= 0)
+  unsigned Offset = 0;
+  for (SDValue V : { V1, V2 }) {
+    unsigned Opcode;
+    int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
+    if (ShiftAmt >= 0) {
+      // FIXME: We can't easily widen an illegal right shift if we need to shift
+      // in zeroes.
+      if (Opcode == X86ISD::KSHIFTR &&
+          (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)))
         return DAG.getNode(Opcode, DL, VT, V,
                            DAG.getConstant(ShiftAmt, DL, MVT::i8));
-      Offset += NumElts; // Increment for next iteration.
+      if (Opcode == X86ISD::KSHIFTL) {
+        // If this is a shift left we can widen the VT to a suported kshiftl.
+        MVT WideVT = VT;
+        if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8)
+          WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+        SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT,
+                                  DAG.getUNDEF(WideVT), V,
+                                  DAG.getIntPtrConstant(0, DL));
+        Res = DAG.getNode(Opcode, DL, WideVT, V,
+                          DAG.getConstant(ShiftAmt, DL, MVT::i8));
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+                           DAG.getIntPtrConstant(0, DL));
+      }
     }
+    Offset += NumElts; // Increment for next iteration.
   }
 
 
+
   MVT ExtVT;
   switch (VT.SimpleTy) {
   default:

Modified: llvm/trunk/test/CodeGen/X86/kshift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/kshift.ll?rev=369215&r1=369214&r2=369215&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/kshift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/kshift.ll Sun Aug 18 11:52:46 2019
@@ -5,13 +5,9 @@
 define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
 ; KNL-LABEL: kshiftl_v8i1_1:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    movb $-2, %al
-; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    kshiftlw $1, %k0, %k1
+; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
 ; KNL-NEXT:    vzeroupper
@@ -177,13 +173,9 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x
 define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
 ; KNL-LABEL: kshiftl_v8i1_7:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    movb $-128, %al
-; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    kshiftlw $7, %k0, %k1
+; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
 ; KNL-NEXT:    vzeroupper
@@ -475,13 +467,9 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x
 define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
 ; KNL-LABEL: kshiftr_v8i1_7:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    movb $-2, %al
-; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    kshiftlw $1, %k0, %k1
+; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
 ; KNL-NEXT:    vzeroupper
@@ -599,14 +587,9 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %
 define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
 ; KNL-LABEL: kshiftl_v8i1_zu123u56:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; KNL-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; KNL-NEXT:    vmovdqa64 {{.*#+}} zmm3 = <8,u,1,2,3,u,5,6>
-; KNL-NEXT:    vpermi2q %zmm0, %zmm2, %zmm3
-; KNL-NEXT:    vpsllq $63, %zmm3, %zmm0
-; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    kshiftlw $1, %k0, %k1
+; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
 ; KNL-NEXT:    vzeroupper
@@ -632,12 +615,9 @@ define i8 @kshiftl_v8i1_zu123u56(<8 x i6
 define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
 ; KNL-LABEL: kshiftl_v8i1_u0123456:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k1
-; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[7,0,1,2,3,4,5,6]
-; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
-; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
-; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0 {%k1}
+; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    kshiftlw $1, %k0, %k1
+; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0 {%k1}
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
 ; KNL-NEXT:    vzeroupper




More information about the llvm-commits mailing list