[llvm] r318278 - [X86] Redefine the 128-bit version of VPGATHERQD and VGATHERQPS to use a VK2 mask instead of a VK4 mask.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 14 23:46:43 PST 2017


Author: ctopper
Date: Tue Nov 14 23:46:43 2017
New Revision: 318278

URL: http://llvm.org/viewvc/llvm-project?rev=318278&view=rev
Log:
[X86] Redefine the 128-bit version of VPGATHERQD and VGATHERQPS to use a VK2 mask instead of a VK4 mask.

This allows us to remove extra extend creation during lowering and more accurately reflects the semantics of the instruction.

While there add an extra output VT to X86 masked gather node to better match the isel pattern predicate. Currently we're exploiting the fact that the isel table doesn't count how many output results a node actually has if the result type of any can be inferred from the first result and the type constraints defined in tablegen. I think we might ultimately want to lower all MGATHER/MSCATTER to an X86ISD node with the extra mask result and stop relying on this hole in the isel checking.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318278&r1=318277&r2=318278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 14 23:46:43 2017
@@ -24368,11 +24368,10 @@ static SDValue LowerMGATHER(SDValue Op,
     // The mask should match the destination type. Extending mask with zeroes
     // is not necessary since instruction itself reads only two values from
     // memory.
-    Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
     SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
     SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
-      DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
-      N->getMemOperand());
+      DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other), Ops, dl,
+      N->getMemoryVT(), N->getMemOperand());
 
     SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
                                   NewGather.getValue(0), DAG);
@@ -24392,16 +24391,16 @@ static SDValue LowerMGATHER(SDValue Op,
         ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
         Index.getOpcode() == ISD::CONCAT_VECTORS &&
         Index.getOperand(1).isUndef()) {
-      Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
+      Mask = Mask.getOperand(0);
       Index = Index.getOperand(0);
     } else
       return Op;
     SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
     SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
-      DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
-      N->getMemOperand());
+      DAG.getVTList(MVT::v4f32, MVT::v2i1, MVT::Other), Ops, dl,
+      N->getMemoryVT(), N->getMemOperand());
 
-    SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
+    SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(2) };
     return DAG.getMergeValues(RetOps, dl);
 
   }

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=318278&r1=318277&r2=318278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Nov 14 23:46:43 2017
@@ -8196,15 +8196,16 @@ defm : AVX512_pmovx_patterns<"VPMOVZX",
 // GATHER - SCATTER Operations
 
 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                         X86MemOperand memop, PatFrag GatherNode> {
+                         X86MemOperand memop, PatFrag GatherNode,
+                         RegisterClass MaskRC = _.KRCWM> {
   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
       ExeDomain = _.ExeDomain in
-  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
-            (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
+  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
+            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
             !strconcat(OpcodeStr#_.Suffix,
             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-            [(set _.RC:$dst, _.KRCWM:$mask_wb,
-              (GatherNode  (_.VT _.RC:$src1), _.KRCWM:$mask,
+            [(set _.RC:$dst, MaskRC:$mask_wb,
+              (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
                      vectoraddr:$src2))]>, EVEX, EVEX_K,
              EVEX_CD8<_.EltSize, CD8VT1>;
 }
@@ -8241,7 +8242,8 @@ let Predicates = [HasVLX] in {
   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
                                           vx128xmem, mgatherv4i32>, EVEX_V128;
   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
-                                          vx64xmem, X86mgatherv2i64>, EVEX_V128;
+                                          vx64xmem, X86mgatherv2i64, VK2WM>,
+                                          EVEX_V128;
 }
 }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=318278&r1=318277&r2=318278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Nov 14 23:46:43 2017
@@ -751,6 +751,15 @@ def memopv4f32 : PatFrag<(ops node:$ptr)
 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
 
+// Hack because we can't write a tablegen pattern that requires the type
+// of result 1 to be checked. So explicitly force the mask to v2i1.
+def X86masked_gatherv2i64 : SDNode<"X86ISD::MGATHER",
+                                   SDTypeProfile<2, 3, [SDTCisVec<0>,
+                                                        SDTCisVT<1, v2i1>,
+                                                        SDTCisSameAs<0, 2>,
+                                                        SDTCisSameAs<1, 3>,
+                                                        SDTCisPtrTy<4>]>,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86masked_gather  : SDNode<"X86ISD::MGATHER",  SDTMaskedGather,
                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
@@ -778,7 +787,7 @@ def mgatherv2i64 : PatFrag<(ops node:$sr
   return false;
 }]>;
 def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-  (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
+  (X86masked_gatherv2i64 node:$src1, node:$src2, node:$src3) , [{
   if (X86MaskedGatherSDNode *Mgt = dyn_cast<X86MaskedGatherSDNode>(N))
     return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
             Mgt->getBasePtr().getValueType() == MVT::v2i64) &&




More information about the llvm-commits mailing list