[llvm] r368344 - [SelectionDAG][X86] Move setcc mask splitting for mload/mstore/mgather/mscatter from DAGCombiner to the type legalizer.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 8 14:14:09 PDT 2019


Author: ctopper
Date: Thu Aug  8 14:14:08 2019
New Revision: 368344

URL: http://llvm.org/viewvc/llvm-project?rev=368344&view=rev
Log:
[SelectionDAG][X86] Move setcc mask splitting for mload/mstore/mgather/mscatter from DAGCombiner to the type legalizer.

We may be able to look to how VSELECT is handled to further
improve this, but this appears to be neutral or an improvement
on the test cases we have.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/trunk/test/CodeGen/X86/masked_load.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=368344&r1=368343&r2=368344&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Aug  8 14:14:08 2019
@@ -8431,23 +8431,6 @@ SDValue DAGCombiner::visitSELECT(SDNode
   return SDValue();
 }
 
-static
-std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
-  SDLoc DL(N);
-  EVT LoVT, HiVT;
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
-  // Split the inputs.
-  SDValue Lo, Hi, LL, LH, RL, RH;
-  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
-  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
-  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
-  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
-  return std::make_pair(Lo, Hi);
-}
-
 // This function assumes all the vselect's arguments are CONCAT_VECTOR
 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
@@ -8506,7 +8489,6 @@ static SDValue ConvertSelectToConcatVect
 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   SDValue Mask = MSC->getMask();
-  SDValue Data = MSC->getValue();
   SDValue Chain = MSC->getChain();
   SDLoc DL(N);
 
@@ -8514,124 +8496,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNod
   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
     return Chain;
 
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
-  // If the MSCATTER data type requires splitting and the mask is provided by a
-  // SETCC, then split both nodes and its operands before legalization. This
-  // prevents the type legalizer from unrolling SETCC into scalar comparisons
-  // and enables future optimizations (e.g. min/max pattern matching on X86).
-  if (Mask.getOpcode() != ISD::SETCC)
-    return SDValue();
-
-  // Check if any splitting is required.
-  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
-      TargetLowering::TypeSplitVector)
-    return SDValue();
-  SDValue MaskLo, MaskHi;
-  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
-  EVT LoVT, HiVT;
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
-
-  EVT MemoryVT = MSC->getMemoryVT();
-  unsigned Alignment = MSC->getOriginalAlignment();
-
-  EVT LoMemVT, HiMemVT;
-  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
-  SDValue DataLo, DataHi;
-  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
-  SDValue Scale = MSC->getScale();
-  SDValue BasePtr = MSC->getBasePtr();
-  SDValue IndexLo, IndexHi;
-  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
-
-  MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MSC->getPointerInfo(),
-                          MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
-                          Alignment, MSC->getAAInfo(), MSC->getRanges());
-
-  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
-  SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
-                                    DataLo.getValueType(), DL, OpsLo, MMO,
-                                    MSC->getIndexType());
-
-  // The order of the Scatter operation after split is well defined. The "Hi"
-  // part comes after the "Lo". So these two operations should be chained one
-  // after another.
-  SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
-  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
-                              DL, OpsHi, MMO, MSC->getIndexType());
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
-  SDValue Data = MST->getValue();
   SDValue Chain = MST->getChain();
-  EVT VT = Data.getValueType();
   SDLoc DL(N);
 
   // Zap masked stores with a zero mask.
   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
     return Chain;
 
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
-  // If the MSTORE data type requires splitting and the mask is provided by a
-  // SETCC, then split both nodes and its operands before legalization. This
-  // prevents the type legalizer from unrolling SETCC into scalar comparisons
-  // and enables future optimizations (e.g. min/max pattern matching on X86).
-  if (Mask.getOpcode() == ISD::SETCC) {
-    // Check if any splitting is required.
-    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
-        TargetLowering::TypeSplitVector)
-      return SDValue();
-
-    SDValue MaskLo, MaskHi, Lo, Hi;
-    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
-    SDValue Ptr   = MST->getBasePtr();
-
-    EVT MemoryVT = MST->getMemoryVT();
-    unsigned Alignment = MST->getOriginalAlignment();
-
-    EVT LoMemVT, HiMemVT;
-    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
-    SDValue DataLo, DataHi;
-    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
-    MachineMemOperand *MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(),
-                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
-                           Alignment, MST->getAAInfo(), MST->getRanges());
-
-    Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
-                            MST->isTruncatingStore(),
-                            MST->isCompressingStore());
-
-    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
-                                     MST->isCompressingStore());
-    unsigned HiOffset = LoMemVT.getStoreSize();
-
-    MMO = DAG.getMachineFunction().getMachineMemOperand(
-        MST->getPointerInfo().getWithOffset(HiOffset),
-        MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
-        MST->getAAInfo(), MST->getRanges());
-
-    Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
-                            MST->isTruncatingStore(),
-                            MST->isCompressingStore());
-
-    AddToWorklist(Lo.getNode());
-    AddToWorklist(Hi.getNode());
-
-    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
-  }
   return SDValue();
 }
 
@@ -8644,76 +8521,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode
   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
 
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
-  // If the MGATHER result requires splitting and the mask is provided by a
-  // SETCC, then split both nodes and its operands before legalization. This
-  // prevents the type legalizer from unrolling SETCC into scalar comparisons
-  // and enables future optimizations (e.g. min/max pattern matching on X86).
-
-  if (Mask.getOpcode() != ISD::SETCC)
-    return SDValue();
-
-  EVT VT = N->getValueType(0);
-
-  // Check if any splitting is required.
-  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
-      TargetLowering::TypeSplitVector)
-    return SDValue();
-
-  SDValue MaskLo, MaskHi, Lo, Hi;
-  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
-  SDValue PassThru = MGT->getPassThru();
-  SDValue PassThruLo, PassThruHi;
-  std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
-  EVT LoVT, HiVT;
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
-  SDValue Chain = MGT->getChain();
-  EVT MemoryVT = MGT->getMemoryVT();
-  unsigned Alignment = MGT->getOriginalAlignment();
-
-  EVT LoMemVT, HiMemVT;
-  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
-  SDValue Scale = MGT->getScale();
-  SDValue BasePtr = MGT->getBasePtr();
-  SDValue Index = MGT->getIndex();
-  SDValue IndexLo, IndexHi;
-  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
-
-  MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MGT->getPointerInfo(),
-                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
-                          Alignment, MGT->getAAInfo(), MGT->getRanges());
-
-  SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
-  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
-                           MMO, MGT->getIndexType());
-
-  SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
-  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
-                           MMO, MGT->getIndexType());
-
-  AddToWorklist(Lo.getNode());
-  AddToWorklist(Hi.getNode());
-
-  // Build a factor node to remember that this load is independent of the
-  // other one.
-  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
-                      Hi.getValue(1));
-
-  // Legalized the chain result - switch anything that used the old chain to
-  // use the new one.
-  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
-
-  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
-  SDValue RetOps[] = { GatherRes, Chain };
-  return DAG.getMergeValues(RetOps, DL);
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
@@ -8725,76 +8533,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *
   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
 
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
-  // If the MLOAD result requires splitting and the mask is provided by a
-  // SETCC, then split both nodes and its operands before legalization. This
-  // prevents the type legalizer from unrolling SETCC into scalar comparisons
-  // and enables future optimizations (e.g. min/max pattern matching on X86).
-  if (Mask.getOpcode() == ISD::SETCC) {
-    EVT VT = N->getValueType(0);
-
-    // Check if any splitting is required.
-    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
-        TargetLowering::TypeSplitVector)
-      return SDValue();
-
-    SDValue MaskLo, MaskHi, Lo, Hi;
-    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
-    SDValue PassThru = MLD->getPassThru();
-    SDValue PassThruLo, PassThruHi;
-    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
-    EVT LoVT, HiVT;
-    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
-
-    SDValue Chain = MLD->getChain();
-    SDValue Ptr   = MLD->getBasePtr();
-    EVT MemoryVT = MLD->getMemoryVT();
-    unsigned Alignment = MLD->getOriginalAlignment();
-
-    EVT LoMemVT, HiMemVT;
-    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
-    MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(),
-                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
-                         Alignment, MLD->getAAInfo(), MLD->getRanges());
-
-    Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
-                           MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
-    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
-                                     MLD->isExpandingLoad());
-    unsigned HiOffset = LoMemVT.getStoreSize();
-
-    MMO = DAG.getMachineFunction().getMachineMemOperand(
-        MLD->getPointerInfo().getWithOffset(HiOffset),
-        MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
-        MLD->getAAInfo(), MLD->getRanges());
-
-    Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
-                           MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
-    AddToWorklist(Lo.getNode());
-    AddToWorklist(Hi.getNode());
-
-    // Build a factor node to remember that this load is independent of the
-    // other one.
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
-                        Hi.getValue(1));
-
-    // Legalized the chain result - switch anything that used the old chain to
-    // use the new one.
-    DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
-
-    SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
-    SDValue RetOps[] = { LoadRes, Chain };
-    return DAG.getMergeValues(RetOps, DL);
-  }
   return SDValue();
 }
 

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=368344&r1=368343&r2=368344&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Thu Aug  8 14:14:08 2019
@@ -933,6 +933,8 @@ private:
   void SplitRes_SELECT_CC   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitRes_UNDEF       (SDNode *N, SDValue &Lo, SDValue &Hi);
 
+  void SplitVSETCC(const SDNode *N);
+
   //===--------------------------------------------------------------------===//
   // Generic Expansion: LegalizeTypesGeneric.cpp
   //===--------------------------------------------------------------------===//

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=368344&r1=368343&r2=368344&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Thu Aug  8 14:14:08 2019
@@ -1560,10 +1560,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD
 
   // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Mask, MaskLo, MaskHi);
-  else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  if (Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  } else {
+    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+      GetSplitVector(Mask, MaskLo, MaskHi);
+    else
+      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  }
 
   EVT MemoryVT = MLD->getMemoryVT();
   EVT LoMemVT, HiMemVT;
@@ -1622,10 +1626,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATH
 
   // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Mask, MaskLo, MaskHi);
-  else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  if (Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  } else {
+    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+      GetSplitVector(Mask, MaskLo, MaskHi);
+    else
+      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  }
 
   EVT MemoryVT = MGT->getMemoryVT();
   EVT LoMemVT, HiMemVT;
@@ -2340,12 +2348,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MST
   else
     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
 
+  // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-    // Split Mask operand
-    GetSplitVector(Mask, MaskLo, MaskHi);
-  else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+  if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  } else {
+    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+      GetSplitVector(Mask, MaskLo, MaskHi);
+    else
+      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+  }
 
   SDValue Lo, Hi;
   MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -2397,12 +2409,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSC
   else
     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
 
+  // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-    // Split Mask operand
-    GetSplitVector(Mask, MaskLo, MaskHi);
-  else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+  if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  } else {
+    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+      GetSplitVector(Mask, MaskLo, MaskHi);
+    else
+      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+  }
 
   SDValue IndexHi, IndexLo;
   if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=368344&r1=368343&r2=368344&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Thu Aug  8 14:14:08 2019
@@ -2774,12 +2774,12 @@ define <16 x float> @zext_index(float* %
 define <16 x double> @test_gather_setcc_split(double* %base, <16 x i32> %ind, <16 x i32> %cmp, <16 x double> %passthru) {
 ; KNL_64-LABEL: test_gather_setcc_split:
 ; KNL_64:       # %bb.0:
-; KNL_64-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; KNL_64-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; KNL_64-NEXT:    vptestnmd %zmm5, %zmm5, %k1
+; KNL_64-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; KNL_64-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 ; KNL_64-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 ; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k2}
-; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm4,8), %zmm3 {%k1}
+; KNL_64-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm3 {%k1}
 ; KNL_64-NEXT:    vmovapd %zmm2, %zmm0
 ; KNL_64-NEXT:    vmovapd %zmm3, %zmm1
 ; KNL_64-NEXT:    retq
@@ -2795,12 +2795,12 @@ define <16 x double> @test_gather_setcc_
 ; KNL_32-NEXT:    subl $64, %esp
 ; KNL_32-NEXT:    vmovapd 72(%ebp), %zmm3
 ; KNL_32-NEXT:    movl 8(%ebp), %eax
-; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; KNL_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; KNL_32-NEXT:    vptestnmd %zmm5, %zmm5, %k1
+; KNL_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; KNL_32-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 ; KNL_32-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 ; KNL_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm2 {%k2}
-; KNL_32-NEXT:    vgatherdpd (%eax,%ymm4,8), %zmm3 {%k1}
+; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm3 {%k1}
 ; KNL_32-NEXT:    vmovapd %zmm2, %zmm0
 ; KNL_32-NEXT:    vmovapd %zmm3, %zmm1
 ; KNL_32-NEXT:    movl %ebp, %esp
@@ -2810,12 +2810,12 @@ define <16 x double> @test_gather_setcc_
 ;
 ; SKX-LABEL: test_gather_setcc_split:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; SKX-NEXT:    vptestnmd %ymm5, %ymm5, %k1
+; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; SKX-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 ; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 ; SKX-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k2}
-; SKX-NEXT:    vgatherdpd (%rdi,%ymm4,8), %zmm3 {%k1}
+; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; SKX-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm3 {%k1}
 ; SKX-NEXT:    vmovapd %zmm2, %zmm0
 ; SKX-NEXT:    vmovapd %zmm3, %zmm1
 ; SKX-NEXT:    retq
@@ -2831,12 +2831,12 @@ define <16 x double> @test_gather_setcc_
 ; SKX_32-NEXT:    subl $64, %esp
 ; SKX_32-NEXT:    vmovapd 72(%ebp), %zmm3
 ; SKX_32-NEXT:    movl 8(%ebp), %eax
-; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; SKX_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; SKX_32-NEXT:    vptestnmd %ymm5, %ymm5, %k1
+; SKX_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; SKX_32-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 ; SKX_32-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 ; SKX_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm2 {%k2}
-; SKX_32-NEXT:    vgatherdpd (%eax,%ymm4,8), %zmm3 {%k1}
+; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm3 {%k1}
 ; SKX_32-NEXT:    vmovapd %zmm2, %zmm0
 ; SKX_32-NEXT:    vmovapd %zmm3, %zmm1
 ; SKX_32-NEXT:    movl %ebp, %esp
@@ -2854,12 +2854,12 @@ define <16 x double> @test_gather_setcc_
 define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32> %cmp, <16 x double> %src0)  {
 ; KNL_64-LABEL: test_scatter_setcc_split:
 ; KNL_64:       # %bb.0:
-; KNL_64-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; KNL_64-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; KNL_64-NEXT:    vptestnmd %zmm5, %zmm5, %k1
+; KNL_64-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; KNL_64-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 ; KNL_64-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 ; KNL_64-NEXT:    vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2}
-; KNL_64-NEXT:    vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k1}
+; KNL_64-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; KNL_64-NEXT:    vscatterdpd %zmm3, (%rdi,%ymm0,8) {%k1}
 ; KNL_64-NEXT:    vzeroupper
 ; KNL_64-NEXT:    retq
 ;
@@ -2874,12 +2874,12 @@ define void @test_scatter_setcc_split(do
 ; KNL_32-NEXT:    subl $64, %esp
 ; KNL_32-NEXT:    vmovapd 72(%ebp), %zmm3
 ; KNL_32-NEXT:    movl 8(%ebp), %eax
-; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; KNL_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; KNL_32-NEXT:    vptestnmd %zmm5, %zmm5, %k1
+; KNL_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; KNL_32-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 ; KNL_32-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 ; KNL_32-NEXT:    vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2}
-; KNL_32-NEXT:    vscatterdpd %zmm3, (%eax,%ymm4,8) {%k1}
+; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT:    vscatterdpd %zmm3, (%eax,%ymm0,8) {%k1}
 ; KNL_32-NEXT:    movl %ebp, %esp
 ; KNL_32-NEXT:    popl %ebp
 ; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
@@ -2888,12 +2888,12 @@ define void @test_scatter_setcc_split(do
 ;
 ; SKX-LABEL: test_scatter_setcc_split:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; SKX-NEXT:    vptestnmd %ymm5, %ymm5, %k1
+; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; SKX-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 ; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 ; SKX-NEXT:    vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2}
-; SKX-NEXT:    vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k1}
+; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; SKX-NEXT:    vscatterdpd %zmm3, (%rdi,%ymm0,8) {%k1}
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ;
@@ -2908,12 +2908,12 @@ define void @test_scatter_setcc_split(do
 ; SKX_32-NEXT:    subl $64, %esp
 ; SKX_32-NEXT:    vmovapd 72(%ebp), %zmm3
 ; SKX_32-NEXT:    movl 8(%ebp), %eax
-; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
-; SKX_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
-; SKX_32-NEXT:    vptestnmd %ymm5, %ymm5, %k1
+; SKX_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
+; SKX_32-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 ; SKX_32-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 ; SKX_32-NEXT:    vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2}
-; SKX_32-NEXT:    vscatterdpd %zmm3, (%eax,%ymm4,8) {%k1}
+; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; SKX_32-NEXT:    vscatterdpd %zmm3, (%eax,%ymm0,8) {%k1}
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    .cfi_def_cfa %esp, 4

Modified: llvm/trunk/test/CodeGen/X86/masked_load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_load.ll?rev=368344&r1=368343&r2=368344&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_load.ll Thu Aug  8 14:14:08 2019
@@ -467,10 +467,9 @@ define <8 x double> @load_v8f64_v8i16(<8
 ; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
 ; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm4
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
 ; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
 ; AVX1-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
@@ -486,8 +485,7 @@ define <8 x double> @load_v8f64_v8i16(<8
 ; AVX2-NEXT:    vpmovsxwd %xmm3, %xmm3
 ; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
 ; AVX2-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpmovsxwd %xmm0, %xmm0
-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
 ; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
 ; AVX2-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
 ; AVX2-NEXT:    vmaskmovpd 32(%rdi), %ymm3, %ymm1
@@ -1792,10 +1790,9 @@ define <8 x i64> @load_v8i64_v8i16(<8 x
 ; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
 ; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm4
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
 ; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
 ; AVX1-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
@@ -1811,8 +1808,7 @@ define <8 x i64> @load_v8i64_v8i16(<8 x
 ; AVX2-NEXT:    vpmovsxwd %xmm3, %xmm3
 ; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
 ; AVX2-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpmovsxwd %xmm0, %xmm0
-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
 ; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm4
 ; AVX2-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
 ; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm3, %ymm1




More information about the llvm-commits mailing list