[llvm] r320134 - [X86] Always consider inserting a vXi1 vector into the lsbs of a zero vector to be legal during lowering. Add isel patterns to emit shifts.

Fri Dec 8 00:10:59 PST 2017

Author: ctopper
Date: Fri Dec  8 00:10:58 2017
New Revision: 320134

URL: http://llvm.org/viewvc/llvm-project?rev=320134&view=rev
Log:
[X86] Always consider inserting a vXi1 vector into the lsbs of a zero vector to be legal during lowering. Add isel patterns to emit shifts.

Previously we only allowed these through if the subvector came from a compare or test instruction which we would again check for during isel.

With this change we only check for the compare and test instructions during isel and have fallback patterns that emit the shifts if needed.

I noticed that in a lot of cases we don't actually see the compare during lowering and rely on an odd legalization of concat_vectors with a zero vector as the second argument. This keeps the concat_vectors around long enough for a later dag combine to expose the compare then we re-legalize the concat_vectors and catch the compare.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320134&r1=320133&r2=320134&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec  8 00:10:58 2017
@@ -5014,7 +5014,12 @@ static SDValue insert1BitVector(SDValue
     return SDValue();
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-  if (IdxVal == 0  && Vec.isUndef()) // the operation is legal
+  if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
+    return Op;
+
+  // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
+  // if necessary.
+  if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode()))
     return Op;
 
   MVT OpVT = Op.getSimpleValueType();
@@ -5026,29 +5031,6 @@ static SDValue insert1BitVector(SDValue
          IdxVal % SubVecVT.getSizeInBits() == 0 &&
          "Unexpected index value in INSERT_SUBVECTOR");
 
-  // There are 3 possible cases:
-  // 1. Subvector should be inserted in the lower part (IdxVal == 0)
-  // 2. Subvector should be inserted in the upper part
-  //    (IdxVal + SubVecNumElems == NumElems)
-  // 3. Subvector should be inserted in the middle (for example v2i1
-  //    to v16i1, index 2)
-
-  // If this node widens - by concatenating zeroes - the type of the result
-  // of a node with instruction that zeroes all upper (irrelevant) bits of the
-  // output register, mark this node as legal to enable replacing them with
-  // the v8i1 version of the previous instruction during instruction selection.
-  // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
-  // while zeroing all the upper remaining 60 bits of the register. if the
-  // result of such instruction is inserted into an allZeroVector, then we can
-  // safely remove insert_vector (in instruction selection) as the cmp instr
-  // already zeroed the rest of the register.
-  if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
-      (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) ||
-       (SubVec.getOpcode() == ISD::AND &&
-        (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) ||
-         isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
-    return Op;
-
   // extend to natively supported kshift
   MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
   MVT WideOpVT = OpVT;
@@ -5076,13 +5058,14 @@ static SDValue insert1BitVector(SDValue
   }
 
   if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
+    assert(IdxVal != 0 && "Unexpected index");
     NumElems = WideOpVT.getVectorNumElements();
     unsigned ShiftLeft = NumElems - SubVecNumElems;
     unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
     Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
                       DAG.getConstant(ShiftLeft, dl, MVT::i8));
-    Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
-      DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
+    Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
+                      DAG.getConstant(ShiftRight, dl, MVT::i8));
     return ExtractSubVec(Vec);
   }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td?rev=320134&r1=320133&r2=320134&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td Fri Dec  8 00:10:58 2017
@@ -495,9 +495,103 @@ let Predicates = [HasBWI, HasVLX] in {
 
 // If the bits are not zero we have to fall back to explicitly zeroing by
 // using shifts.
-let Predicates = [HasAVX512, NoVLX] in {
+let Predicates = [HasAVX512] in {
   def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
                                      (v8i1 VK8:$mask), (iPTR 0))),
             (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
                                     (i8 8)), (i8 8))>;
 }
+
+let Predicates = [HasVLX, NoDQI] in {
+  // TODO can we legalize this out
+  def : Pat<(v4i1 (insert_subvector (v4i1 immAllZerosV),
+                                    (v2i1 VK2:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KSHIFTRWri
+                               (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
+                                           (i8 14)), (i8 14)), VK4)>;
+
+  // TODO can we legalize these out without dqi?
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v2i1 VK2:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KSHIFTRWri
+                               (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
+                                           (i8 14)), (i8 14)), VK8)>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v4i1 VK4:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KSHIFTRWri
+                               (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
+                                           (i8 12)), (i8 12)), VK8)>;
+}
+
+let Predicates = [HasVLX, HasDQI] in {
+  // TODO can we legalize this out
+  def : Pat<(v4i1 (insert_subvector (v4i1 immAllZerosV),
+                                    (v2i1 VK2:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KSHIFTRBri
+                               (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
+                                           (i8 14)), (i8 14)), VK4)>;
+
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
+                                    (i8 6)), (i8 6))>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
+                                    (i8 4)), (i8 4))>;
+}
+
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
+                                    (i8 14)), (i8 14))>;
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
+                                    (i8 12)), (i8 12))>;
+}
+
+let Predicates = [HasBWI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
+                                    (i8 24)), (i8 24))>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK16:$mask, VK32),
+                                    (i8 16)), (i8 16))>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
+                                    (i8 56)), (i8 56))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK16:$mask, VK64),
+                                    (i8 48)), (i8 48))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v32i1 VK32:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK32:$mask, VK64),
+                                    (i8 32)), (i8 32))>;
+}
+
+let Predicates = [HasBWI, HasVLX] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
+                                    (i8 30)), (i8 30))>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
+                                    (i8 28)), (i8 28))>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
+                                    (i8 62)), (i8 62))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
+                                    (i8 60)), (i8 60))>;
+}