[llvm] r290250 - Added a template for building target specific memory node in DAG.

Elena Demikhovsky via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 21 02:43:36 PST 2016


Author: delena
Date: Wed Dec 21 04:43:36 2016
New Revision: 290250

URL: http://llvm.org/viewvc/llvm-project?rev=290250&view=rev
Log:
Added a template for building target specific memory node in DAG.

I added API for creation a target specific memory node in DAG. Today, all memory nodes are common for all targets and their constructors are located in SelectionDAG.cpp.
There are some cases in X86 where we need to create a special node - truncation-with-saturation store, float-to-half-store. 
In the current patch I added truncation-with-saturation nodes and I'm using them for intrinsics. In the future I plan to implement DAG lowering for truncation-with-saturation pattern.

Differential Revision: https://reviews.llvm.org/D27899


Modified:
    llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll

Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Dec 21 04:43:36 2016
@@ -964,6 +964,14 @@ public:
                           ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
   SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
                            ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
+
+  /// Return (create a new or find existing) a target-specific node.
+  /// TargetMemSDNode should be derived class from MemSDNode.
+  template <class TargetMemSDNode>
+  SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef<SDValue> Ops,
+                             const SDLoc &dl, EVT MemVT,
+                             MachineMemOperand *MMO);
+
   /// Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
@@ -1418,6 +1426,42 @@ template <> struct GraphTraits<Selection
   }
 };
 
+template <class TargetMemSDNode>
+SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs,
+                                         ArrayRef<SDValue> Ops,
+                                         const SDLoc &dl, EVT MemVT,
+                                         MachineMemOperand *MMO) {
+
+  /// Compose node ID and try to find an existing node.
+  FoldingSetNodeID ID;
+  unsigned Opcode =
+    TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode();
+  ID.AddInteger(Opcode);
+  ID.AddPointer(VTs.VTs);
+  for (auto& Op : Ops) {
+    ID.AddPointer(Op.getNode());
+    ID.AddInteger(Op.getResNo());
+  }
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  ID.AddInteger(getSyntheticNodeSubclassData<TargetMemSDNode>(
+    dl.getIROrder(), VTs, MemVT, MMO));
+
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+    cast<TargetMemSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+
+  /// Existing node was not found. Create a new one.
+  auto *N = newSDNode<TargetMemSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+                                       MemVT, MMO);
+  createOperands(N, Ops);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
 }  // end namespace llvm
 
 #endif

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Dec 21 04:43:36 2016
@@ -19565,6 +19565,33 @@ static SDValue MarkEHGuard(SDValue Op, S
   return Chain;
 }
 
+/// Emit Truncating Store with signed or unsigned saturation.
+static SDValue
+EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
+                SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
+                SelectionDAG &DAG) {
+
+  SDVTList VTs = DAG.getVTList(MVT::Other);
+  SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  return SignedSat ?
+    DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
+    DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+}
+
+/// Emit Masked Truncating Store with signed or unsigned saturation.
+static SDValue
+EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
+                      SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
+                      MachineMemOperand *MMO, SelectionDAG &DAG) {
+
+  SDVTList VTs = DAG.getVTList(MVT::Other);
+  SDValue Ops[] = { Chain, Ptr, Mask, Val };
+  return SignedSat ?
+    DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
+    DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+}
+
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -19723,18 +19750,39 @@ static SDValue LowerINTRINSIC_W_CHAIN(SD
     MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
     assert(MemIntr && "Expected MemIntrinsicSDNode!");
 
-    EVT VT  = MemIntr->getMemoryVT();
+    EVT MemVT  = MemIntr->getMemoryVT();
 
-    if (isAllOnesConstant(Mask)) // return just a truncate store
-      return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, VT,
-                               MemIntr->getMemOperand());
+    uint16_t TruncationOp = IntrData->Opc0;
+    switch (TruncationOp) {
+    case X86ISD::VTRUNC: {
+      if (isAllOnesConstant(Mask)) // return just a truncate store
+        return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT,
+                                 MemIntr->getMemOperand());
+
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
+      SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+      return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
+                                MemIntr->getMemOperand(), true /* truncating */);
+    }
+    case X86ISD::VTRUNCUS:
+    case X86ISD::VTRUNCS: {
+      bool IsSigned = (TruncationOp == X86ISD::VTRUNCS);
+      if (isAllOnesConstant(Mask))
+        return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT,
+                               MemIntr->getMemOperand(), DAG);
 
-    MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
-    SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
+      SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
-    return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, VT,
-                              MemIntr->getMemOperand(), true /* truncating */);
+      return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr,
+                                   VMask, MemVT, MemIntr->getMemOperand(), DAG);
+    }
+    default:
+      llvm_unreachable("Unsupported truncstore intrinsic");
+    }
   }
+
   case EXPAND_FROM_MEM: {
     SDValue Mask = Op.getOperand(4);
     SDValue PassThru = Op.getOperand(3);
@@ -23470,6 +23518,10 @@ const char *X86TargetLowering::getTarget
   case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
   case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
   case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
+  case X86ISD::VTRUNCSTORES:       return "X86ISD::VTRUNCSTORES";
+  case X86ISD::VTRUNCSTOREUS:      return "X86ISD::VTRUNCSTOREUS";
+  case X86ISD::VMTRUNCSTORES:      return "X86ISD::VMTRUNCSTORES";
+  case X86ISD::VMTRUNCSTOREUS:     return "X86ISD::VMTRUNCSTOREUS";
   case X86ISD::VINSERT:            return "X86ISD::VINSERT";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VFPEXT_RND:         return "X86ISD::VFPEXT_RND";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Dec 21 04:43:36 2016
@@ -606,7 +606,12 @@ namespace llvm {
 
       /// This instruction grabs the address of the next argument
       /// from a va_list. (reads and modifies the va_list in memory)
-      VAARG_64
+      VAARG_64,
+
+      // Vector truncating store with unsigned/signed saturation
+      VTRUNCSTOREUS, VTRUNCSTORES,
+      // Vector truncating masked store with unsigned/signed saturation
+      VMTRUNCSTOREUS, VMTRUNCSTORES
 
       // WARNING: Do not add anything in the end unless you want the node to
       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
@@ -1289,6 +1294,93 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   } // end namespace X86
+
+  // Base class for all X86 non-masked store operations.
+  class X86StoreSDNode : public MemSDNode {
+  public:
+    X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
+                   SDVTList VTs, EVT MemVT,
+                   MachineMemOperand *MMO)
+      :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
+    const SDValue &getValue() const { return getOperand(1); }
+    const SDValue &getBasePtr() const { return getOperand(2); }
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VTRUNCSTORES ||
+        N->getOpcode() == X86ISD::VTRUNCSTOREUS;
+    }
+  };
+
+  // Base class for all X86 masked store operations.
+  // The class has the same order of operands as MaskedStoreSDNode for
+  // convenience.
+  class X86MaskedStoreSDNode : public MemSDNode {
+  public:
+    X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
+                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                         MachineMemOperand *MMO)
+      : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
+
+    const SDValue &getBasePtr() const { return getOperand(1); }
+    const SDValue &getMask()    const { return getOperand(2); }
+    const SDValue &getValue()   const { return getOperand(3); }
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
+        N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
+    }
+  };
+
+  // X86 Truncating Store with Signed saturation.
+  class TruncSStoreSDNode : public X86StoreSDNode {
+  public:
+    TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
+                        SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+      : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VTRUNCSTORES;
+    }
+  };
+
+  // X86 Truncating Store with Unsigned saturation.
+  class TruncUSStoreSDNode : public X86StoreSDNode {
+  public:
+    TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
+                      SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+      : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
+    }
+  };
+
+  // X86 Truncating Masked Store with Signed saturation.
+  class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
+  public:
+    MaskedTruncSStoreSDNode(unsigned Order,
+                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                         MachineMemOperand *MMO)
+      : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VMTRUNCSTORES;
+    }
+  };
+
+  // X86 Truncating Masked Store with Unsigned saturation.
+  class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
+  public:
+    MaskedTruncUSStoreSDNode(unsigned Order,
+                            const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                            MachineMemOperand *MMO)
+      : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
+    }
+  };
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Dec 21 04:43:36 2016
@@ -7310,23 +7310,6 @@ multiclass avx512_trunc_mr_lowering<X86V
                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
 }
 
-multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
-                                        X86VectorVTInfo DestInfo, string sat > {
-
-  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
-                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
-                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
-           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
-                    (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
-                    (SrcInfo.VT SrcInfo.RC:$src))>;
-
-  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
-                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
-                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
-           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
-                    (SrcInfo.VT SrcInfo.RC:$src))>;
-}
-
 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
          AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
          X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@@ -7352,119 +7335,89 @@ multiclass avx512_trunc<bits<8> opc, str
                              truncFrag, mtruncFrag>, EVEX_V512;
 }
 
-multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
-         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
-         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
-         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
-         X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
-
-  let Predicates = [HasVLX, prd] in {
-    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
-                             DestInfoZ128, x86memopZ128>,
-                avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
-                             sat>, EVEX_V128;
-
-    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
-                             DestInfoZ256, x86memopZ256>,
-                avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
-                             sat>, EVEX_V256;
-  }
-  let Predicates = [prd] in
-    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
-                             DestInfoZ, x86memopZ>,
-                avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
-                             sat>, EVEX_V512;
-}
-
-multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
                v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
-               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
-}
-multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
-               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
-               sat>, EVEX_CD8<8, CD8VO>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
 }
 
-multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
                v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
-               truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
-}
-multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
-               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
-               sat>, EVEX_CD8<16, CD8VQ>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
 }
 
-multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
                v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
-               truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
-}
-multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
-               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
-               sat>, EVEX_CD8<32, CD8VH>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
 }
 
-multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
                v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
-               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
-}
-multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
-               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
-               sat>, EVEX_CD8<8, CD8VQ>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
 }
 
-multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
               v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
-              truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
-}
-multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
-              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
-              sat>, EVEX_CD8<16, CD8VH>;
+              StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
 }
 
-multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
               v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
-              truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
+              StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
 }
-multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
-              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
-              sat, HasBWI>, EVEX_CD8<16, CD8VH>;
-}
-
-defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
-defm VPMOVSQB   : avx512_trunc_sat_qb<0x22, "s",   X86vtruncs>;
-defm VPMOVUSQB  : avx512_trunc_sat_qb<0x12, "us",  X86vtruncus>;
-
-defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
-defm VPMOVSQW   : avx512_trunc_sat_qw<0x24, "s",   X86vtruncs>;
-defm VPMOVUSQW  : avx512_trunc_sat_qw<0x14, "us",  X86vtruncus>;
-
-defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
-defm VPMOVSQD   : avx512_trunc_sat_qd<0x25, "s",   X86vtruncs>;
-defm VPMOVUSQD  : avx512_trunc_sat_qd<0x15, "us",  X86vtruncus>;
-
-defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
-defm VPMOVSDB   : avx512_trunc_sat_db<0x21, "s",   X86vtruncs>;
-defm VPMOVUSDB  : avx512_trunc_sat_db<0x11, "us",  X86vtruncus>;
-
-defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
-defm VPMOVSDW   : avx512_trunc_sat_dw<0x23, "s",   X86vtruncs>;
-defm VPMOVUSDW  : avx512_trunc_sat_dw<0x13, "us",  X86vtruncus>;
 
-defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
-defm VPMOVSWB   : avx512_trunc_sat_wb<0x20, "s",   X86vtruncs>;
-defm VPMOVUSWB  : avx512_trunc_sat_wb<0x10, "us",  X86vtruncus>;
+defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   X86vtrunc,
+                                  truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs,
+                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
+
+defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   X86vtrunc,
+                                  truncstorevi16, masked_truncstorevi16>;
+defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs,
+                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
+defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
+                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
+
+defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   X86vtrunc,
+                                  truncstorevi32, masked_truncstorevi32>;
+defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs,
+                                  truncstore_s_vi32, masked_truncstore_s_vi32>;
+defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
+                                  truncstore_us_vi32, masked_truncstore_us_vi32>;
+
+defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc,
+                                  truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs,
+                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
+
+defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc,
+                                  truncstorevi16, masked_truncstorevi16>;
+defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs,
+                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
+defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus,
+                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
+
+defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc,
+                                  truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs,
+                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
 
 let Predicates = [HasAVX512, NoVLX] in {
 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Dec 21 04:43:36 2016
@@ -1021,6 +1021,78 @@ def masked_truncstorevi32 :
   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
 }]>;
 
+def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES",  SDTStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS",  SDTStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES",  SDTMaskedStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS",  SDTMaskedStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncSStore node:$val, node:$ptr), [{
+  return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncUSStore node:$val, node:$ptr), [{
+  return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncSStore node:$val, node:$ptr), [{
+  return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncUSStore node:$val, node:$ptr), [{
+  return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncSStore node:$val, node:$ptr), [{
+  return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncUSStore node:$val, node:$ptr), [{
+  return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                     (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
 def assertzext_i1 :
   PatFrag<(ops node:$src), (assertzext node:$src), [{
     return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Dec 21 04:43:36 2016
@@ -185,6 +185,79 @@ static const IntrinsicData IntrinsicsWit
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+
   X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Wed Dec 21 04:43:36 2016
@@ -2912,8 +2912,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqb %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsqb %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovsqb %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -2946,8 +2946,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqb %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusqb %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovusqb %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3014,8 +3014,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqw %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsqw %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovsqw %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3048,8 +3048,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqw %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusqw %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovusqw %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3116,8 +3116,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqd %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsqd %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovsqd %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3150,8 +3150,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqd %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusqd %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovusqd %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3218,8 +3218,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsdb %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsdb %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovsdb %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
@@ -3252,8 +3252,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusdb %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusdb %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovusdb %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
@@ -3320,8 +3320,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsdw %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsdw %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovsdw %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
@@ -3354,8 +3354,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusdw %zmm0, (%rdi)
 ; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusdw %zmm0, (%rdi)
 ; CHECK-NEXT:    vpmovusdw %zmm0, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
     call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Wed Dec 21 04:43:36 2016
@@ -2010,18 +2010,17 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi)
 ; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi)
 ; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi) {%k1}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
 ; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx)
-; AVX512F-32-NEXT:    kmovd %eax, %k1
-; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT:    vpmovswb %zmm0, (%eax)
+; AVX512F-32-NEXT:    vpmovswb %zmm0, (%eax) {%k1}
 ; AVX512F-32-NEXT:    retl
     call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
     call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
@@ -2063,18 +2062,17 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi)
 ; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi)
 ; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1}
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
 ; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx)
-; AVX512F-32-NEXT:    kmovd %eax, %k1
-; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%eax)
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%eax) {%k1}
 ; AVX512F-32-NEXT:    retl
     call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
     call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)

Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Wed Dec 21 04:43:36 2016
@@ -4090,8 +4090,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
 ; CHECK-NEXT:    vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
@@ -4124,8 +4124,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
 ; CHECK-NEXT:    vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
@@ -4192,8 +4192,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
 ; CHECK-NEXT:    vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
@@ -4226,8 +4226,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
 ; CHECK-NEXT:    vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=290250&r1=290249&r2=290250&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Wed Dec 21 04:43:36 2016
@@ -2040,8 +2040,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
 ; CHECK-NEXT:    vpmovsqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2074,8 +2074,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
 ; CHECK-NEXT:    vpmovusqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2142,8 +2142,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
 ; CHECK-NEXT:    vpmovsqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2176,8 +2176,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
 ; CHECK-NEXT:    vpmovusqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2244,8 +2244,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
 ; CHECK-NEXT:    vpmovsqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2278,8 +2278,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
 ; CHECK-NEXT:    vpmovusqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2346,8 +2346,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
 ; CHECK-NEXT:    vpmovsqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2380,8 +2380,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
 ; CHECK-NEXT:    vpmovusqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2448,8 +2448,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
 ; CHECK-NEXT:    vpmovsqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2482,8 +2482,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
 ; CHECK-NEXT:    vpmovusqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2550,8 +2550,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
 ; CHECK-NEXT:    vpmovsqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2584,8 +2584,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
 ; CHECK-NEXT:    vpmovusqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2652,8 +2652,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
 ; CHECK-NEXT:    vpmovsdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2686,8 +2686,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
 ; CHECK-NEXT:    vpmovusdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2754,8 +2754,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
 ; CHECK-NEXT:    vpmovsdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
@@ -2788,8 +2788,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
 ; CHECK-NEXT:    vpmovusdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
@@ -2856,8 +2856,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
 ; CHECK-NEXT:    vpmovsdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2890,8 +2890,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
 ; CHECK-NEXT:    vpmovusdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2958,8 +2958,8 @@ declare void @llvm.x86.avx512.mask.pmovs
 define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
 ; CHECK-NEXT:    vpmovsdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
@@ -2992,8 +2992,8 @@ declare void @llvm.x86.avx512.mask.pmovu
 define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT:    vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
 ; CHECK-NEXT:    vpmovusdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)




More information about the llvm-commits mailing list