[llvm] r326375 - [X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 28 14:23:55 PST 2018


Author: ctopper
Date: Wed Feb 28 14:23:55 2018
New Revision: 326375

URL: http://llvm.org/viewvc/llvm-project?rev=326375&view=rev
Log:
[X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating.

This is equivalent to what isel was doing anyway but by canonicalizing earlier we can remove some patterns.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/pr33349.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=326375&r1=326374&r2=326375&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 28 14:23:55 2018
@@ -14910,36 +14910,35 @@ static SDValue ExtractBitFromMaskVector(
     return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
   }
 
-  // Canonicalize result type to MVT::i32.
-  if (EltVT != MVT::i32) {
-    SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
-                                  Vec, Idx);
-    return DAG.getAnyExtOrTrunc(Extract, dl, EltVT);
-  }
-
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
 
-  // Extracts from element 0 are always allowed.
-  if (IdxVal == 0)
-    return Op;
-
   // If the kshift instructions of the correct width aren't natively supported
   // then we need to promote the vector to the native size to get the correct
   // zeroing behavior.
-  if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
-      (VecVT.getVectorNumElements() < 8)) {
+  if (VecVT.getVectorNumElements() < 16) {
+    VecVT = MVT::v16i1;
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+                      DAG.getUNDEF(VecVT), Vec,
+                      DAG.getIntPtrConstant(0, dl));
+  }
+
+  // Extracts from element 0 are always allowed.
+  if (IdxVal != 0) {
+    // Use kshiftr instruction to move to the lower element.
+    Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
+                      DAG.getConstant(IdxVal, dl, MVT::i8));
+  }
+
+  // Shrink to v16i1 since that's always legal.
+  if (VecVT.getVectorNumElements() > 16) {
     VecVT = MVT::v16i1;
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
-                      DAG.getUNDEF(VecVT),
-                      Vec,
+    Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
                       DAG.getIntPtrConstant(0, dl));
   }
 
-  // Use kshiftr instruction to move to the lower element.
-  Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
-                    DAG.getConstant(IdxVal, dl, MVT::i8));
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec,
-                     DAG.getIntPtrConstant(0, dl));
+  // Convert to a bitcast+aext/trunc.
+  MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
+  return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
 }
 
 SDValue

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=326375&r1=326374&r2=326375&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Feb 28 14:23:55 2018
@@ -2859,9 +2859,6 @@ let Predicates = [HasAVX512] in {
     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
 
-    def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
-              (COPY_TO_REGCLASS maskRC:$src, GR32)>;
-
     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
   }

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=326375&r1=326374&r2=326375&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Feb 28 14:23:55 2018
@@ -457,10 +457,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::S
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
-def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
-                         SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
-                                              SDTCVecEltisVT<1, i1>,
-                                              SDTCisPtrTy<2>]>>;
 
 def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=326375&r1=326374&r2=326375&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Wed Feb 28 14:23:55 2018
@@ -269,7 +269,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64
 ; SKX-LABEL: test14:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
-; SKX-NEXT:    kshiftrb $4, %k0, %k0
+; SKX-NEXT:    kshiftrw $4, %k0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    cmoveq %rsi, %rdi

Modified: llvm/trunk/test/CodeGen/X86/pr33349.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr33349.ll?rev=326375&r1=326374&r2=326375&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr33349.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr33349.ll Wed Feb 28 14:23:55 2018
@@ -12,32 +12,32 @@ target triple = "x86_64-unknown-linux-gn
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %eax
-; KNL-NEXT:    kshiftrw $2, %k0, %k1
-; KNL-NEXT:    kshiftrw $1, %k1, %k2
-; KNL-NEXT:    kmovw %k1, %ecx
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld1
 ; KNL-NEXT:    fldz
 ; KNL-NEXT:    fld %st(0)
 ; KNL-NEXT:    fcmovne %st(2), %st(0)
-; KNL-NEXT:    testb $1, %cl
+; KNL-NEXT:    kshiftrw $2, %k0, %k1
+; KNL-NEXT:    kshiftrw $1, %k1, %k2
+; KNL-NEXT:    kmovw %k2, %eax
+; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld %st(1)
 ; KNL-NEXT:    fcmovne %st(3), %st(0)
-; KNL-NEXT:    kmovw %k2, %eax
+; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld %st(2)
 ; KNL-NEXT:    fcmovne %st(4), %st(0)
-; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    kmovw %k1, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fxch %st(3)
 ; KNL-NEXT:    fcmovne %st(4), %st(0)
 ; KNL-NEXT:    fstp %st(4)
 ; KNL-NEXT:    fxch %st(3)
+; KNL-NEXT:    fstpt 20(%rdi)
+; KNL-NEXT:    fxch %st(1)
 ; KNL-NEXT:    fstpt (%rdi)
 ; KNL-NEXT:    fxch %st(1)
 ; KNL-NEXT:    fstpt 30(%rdi)
-; KNL-NEXT:    fxch %st(1)
-; KNL-NEXT:    fstpt 20(%rdi)
 ; KNL-NEXT:    fstpt 10(%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
@@ -54,11 +54,11 @@ target triple = "x86_64-unknown-linux-gn
 ; SKX-NEXT:    fldz
 ; SKX-NEXT:    fld %st(0)
 ; SKX-NEXT:    fcmovne %st(2), %st(0)
-; SKX-NEXT:    kshiftrw $1, %k0, %k2
-; SKX-NEXT:    kmovd %k2, %eax
+; SKX-NEXT:    kmovd %k1, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld %st(1)
 ; SKX-NEXT:    fcmovne %st(3), %st(0)
+; SKX-NEXT:    kshiftrw $1, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld %st(2)
@@ -71,9 +71,9 @@ target triple = "x86_64-unknown-linux-gn
 ; SKX-NEXT:    fxch %st(3)
 ; SKX-NEXT:    fstpt (%rdi)
 ; SKX-NEXT:    fxch %st(1)
-; SKX-NEXT:    fstpt 20(%rdi)
-; SKX-NEXT:    fxch %st(1)
 ; SKX-NEXT:    fstpt 10(%rdi)
+; SKX-NEXT:    fxch %st(1)
+; SKX-NEXT:    fstpt 20(%rdi)
 ; SKX-NEXT:    fstpt 30(%rdi)
 ; SKX-NEXT:    retq
  bb:




More information about the llvm-commits mailing list