[llvm] 9b5f626 - [SLP]Fix cost of the broadcast buildvector/gather.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 6 09:27:41 PST 2023


Author: Alexey Bataev
Date: 2023-01-06T09:25:05-08:00
New Revision: 9b5f62685ab447ba9d3ea8ac2616e0c76a44d21b

URL: https://github.com/llvm/llvm-project/commit/9b5f62685ab447ba9d3ea8ac2616e0c76a44d21b
DIFF: https://github.com/llvm/llvm-project/commit/9b5f62685ab447ba9d3ea8ac2616e0c76a44d21b.diff

LOG: [SLP]Fix cost of the broadcast buildvector/gather.

Need to include the cost of the initial insertelement to the cost of the
broadcasts. Also, need to adjust the cost of the gather/buildvector if
the element is inserted into poison/undef vector.

Differential Revision: https://reviews.llvm.org/D140498

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
    llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.h
    llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
    llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
    llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
    llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
    llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/lib/Target/X86/X86TargetTransformInfo.h
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/loop_v2.ll
    llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
    llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vector-insert.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
    llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
    llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
    llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
    llvm/test/Transforms/SLPVectorizer/X86/cse.ll
    llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll
    llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 6200af73842c7..a9cb8717ffa88 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1193,7 +1193,8 @@ class TargetTransformInfo {
   /// case is to provision the cost of vectorization/scalarization in
   /// vectorizer passes.
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index = -1) const;
+                                     unsigned Index = -1, Value *Op0 = nullptr,
+                                     Value *Op1 = nullptr) const;
 
   /// \return The expected cost of vector Insert and Extract.
   /// This is used when instruction is available, and implementation
@@ -1786,7 +1787,8 @@ class TargetTransformInfo::Concept {
                                              TTI::TargetCostKind CostKind,
                                              const Instruction *I) = 0;
   virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                             unsigned Index) = 0;
+                                             unsigned Index, Value *Op0,
+                                             Value *Op1) = 0;
   virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                              unsigned Index) = 0;
 
@@ -2358,9 +2360,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
                                      const Instruction *I) override {
     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
   }
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index) override {
-    return Impl.getVectorInstrCost(Opcode, Val, Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1) override {
+    return Impl.getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
   }
   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                      unsigned Index) override {

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index e81e430f66248..262b42a05d993 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -585,8 +585,8 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index) const {
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1) const {
     return 1;
   }
 

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index aabb94d82c4bf..f27c6899d757b 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -90,10 +90,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     InstructionCost Cost = 0;
     // Broadcast cost is equal to the cost of extracting the zero'th element
     // plus the cost of inserting it into every element of the result vector.
-    Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
+    Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0,
+                                        nullptr, nullptr);
 
     for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
-      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
+      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i,
+                                          nullptr, nullptr);
     }
     return Cost;
   }
@@ -110,8 +112,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     // vector and finally index 3 of second vector and insert them at index
     // <0,1,2,3> of result vector.
     for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
-      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
-      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
+      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i,
+                                          nullptr, nullptr);
+      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i,
+                                          nullptr, nullptr);
     }
     return Cost;
   }
@@ -134,9 +138,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     // type.
     for (int i = 0; i != NumSubElts; ++i) {
       Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
-                                          i + Index);
-      Cost +=
-          thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
+                                          i + Index, nullptr, nullptr);
+      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i,
+                                          nullptr, nullptr);
     }
     return Cost;
   }
@@ -158,10 +162,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     // the source type plus the cost of inserting them into the result vector
     // type.
     for (int i = 0; i != NumSubElts; ++i) {
-      Cost +=
-          thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
+      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
+                                          i, nullptr, nullptr);
       Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
-                                          i + Index);
+                                          i + Index, nullptr, nullptr);
     }
     return Cost;
   }
@@ -212,7 +216,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                  FixedVectorType::get(
                                      PointerType::get(VT->getElementType(), 0),
                                      VT->getNumElements()),
-                                 -1)
+                                 -1, nullptr, nullptr)
             : 0;
     InstructionCost LoadCost =
         VT->getNumElements() *
@@ -237,7 +241,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                Instruction::ExtractElement,
                FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()),
                                     VT->getNumElements()),
-               -1) +
+               -1, nullptr, nullptr) +
            getCFInstrCost(Instruction::Br, CostKind) +
            getCFInstrCost(Instruction::PHI, CostKind));
     }
@@ -722,9 +726,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       if (!DemandedElts[i])
         continue;
       if (Insert)
-        Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+        Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i,
+                                            nullptr, nullptr);
       if (Extract)
-        Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+        Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i,
+                                            nullptr, nullptr);
     }
 
     return Cost;
@@ -1123,7 +1129,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
                                            VectorType *VecTy, unsigned Index) {
     return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
-                                       Index) +
+                                       Index, nullptr, nullptr) +
            thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
                                      TTI::CastContextHint::None,
                                      TTI::TCK_RecipThroughput);
@@ -1184,14 +1190,20 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return 1;
   }
 
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index) {
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1) {
     return getRegUsageForType(Val->getScalarType());
   }
 
   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                      unsigned Index) {
-    return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index);
+    Value *Op0 = nullptr;
+    Value *Op1 = nullptr;
+    if (auto *IE = dyn_cast<InsertElementInst>(&I)) {
+      Op0 = IE->getOperand(0);
+      Op1 = IE->getOperand(1);
+    }
+    return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index, Op0, Op1);
   }
 
   InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
@@ -2246,7 +2258,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     ArithCost +=
         NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
     return ShuffleCost + ArithCost +
-           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
+           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
+                                       nullptr, nullptr);
   }
 
   /// Try to calculate the cost of performing strict (in-order) reductions,
@@ -2353,7 +2366,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     // The last min/max should be in vector registers and we counted it above.
     // So just need a single extractelement.
     return ShuffleCost + MinMaxCost +
-           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
+           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
+                                       nullptr, nullptr);
   }
 
   InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7459ce18c3cf8..d03a8cf14172d 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -897,13 +897,13 @@ InstructionCost TargetTransformInfo::getCmpSelInstrCost(
   return Cost;
 }
 
-InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
-                                                        Type *Val,
-                                                        unsigned Index) const {
+InstructionCost TargetTransformInfo::getVectorInstrCost(
+    unsigned Opcode, Type *Val, unsigned Index, Value *Op0, Value *Op1) const {
   // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
   // This is mentioned in the interface description and respected by all
   // callers, but never asserted upon.
-  InstructionCost Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+  InstructionCost Cost =
+      TTIImpl->getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ae12ae951d756..f5f6c07f766ae 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2034,8 +2034,8 @@ InstructionCost AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode,
 
   // Get the cost for the extract. We compute the cost (if any) for the extend
   // below.
-  InstructionCost Cost =
-      getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
+  InstructionCost Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy,
+                                            Index, nullptr, nullptr);
 
   // Legalize the types.
   auto VecLT = getTypeLegalizationCost(VecTy);
@@ -2128,7 +2128,8 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
 }
 
 InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                                   unsigned Index) {
+                                                   unsigned Index, Value *Op0,
+                                                   Value *Op1) {
   return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e309117a885ba..6eaff9566b8cf 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -169,8 +169,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                  const Instruction *I = nullptr);
 
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
                                      unsigned Index);
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index af72ba2daa2d9..00e6970291bf8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -790,7 +790,8 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
 }
 
 InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                               unsigned Index) {
+                                               unsigned Index, Value *Op0,
+                                               Value *Op1) {
   switch (Opcode) {
   case Instruction::ExtractElement:
   case Instruction::InsertElement: {
@@ -799,7 +800,7 @@ InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     if (EltSize < 32) {
       if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
         return 0;
-      return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+      return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
     }
 
     // Extracts are just reads of a subregister, so are free. Inserts are
@@ -810,7 +811,7 @@ InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     return Index == ~0u ? 2 : 0;
   }
   default:
-    return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+    return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
   }
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 347ce87acd264..4a1137dcf2e2c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -162,7 +162,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
 
   using BaseT::getVectorInstrCost;
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                     unsigned Index);
+                                     unsigned Index, Value *Op0, Value *Op1);
 
   bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;
   bool isSourceOfDivergence(const Value *V) const;

diff  --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
index 365c005b25034..c3dd321a7b9c0 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -108,14 +108,15 @@ InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
 }
 
 InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                                unsigned Index) {
+                                                unsigned Index, Value *Op0,
+                                                Value *Op1) {
   switch (Opcode) {
   case Instruction::ExtractElement:
   case Instruction::InsertElement: {
     unsigned EltSize =
         DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
     if (EltSize < 32) {
-      return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+      return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
     }
 
     // Extracts are just reads of a subregister, so are free. Inserts are
@@ -126,7 +127,7 @@ InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     return Index == ~0u ? 2 : 0;
   }
   default:
-    return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+    return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
   }
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
index f1a198fd14e49..9045cc7731894 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
@@ -62,7 +62,7 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
                                  const Instruction *I = nullptr);
   using BaseT::getVectorInstrCost;
   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                     unsigned Index);
+                                     unsigned Index, Value *Op0, Value *Op1);
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 8eec432a4a668..07786ea827382 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -874,7 +874,8 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
 }
 
 InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                               unsigned Index) {
+                                               unsigned Index, Value *Op0,
+                                               Value *Op1) {
   // Penalize inserting into an D-subregister. We end up with a three times
   // lower estimated throughput on swift.
   if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
@@ -893,7 +894,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     if (ValTy->isVectorTy() &&
         ValTy->getScalarSizeInBits() <= 32)
       return std::max<InstructionCost>(
-          BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
+          BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1), 2U);
   }
 
   if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
@@ -906,7 +907,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
   }
 
-  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+  return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
 }
 
 InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index db96c3da54cf7..6b1e6444c5168 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -240,8 +240,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
                                      const Instruction *I = nullptr);
 
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
 
   InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
                                             const SCEV *Ptr);

diff  --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 779577816fb93..6089c865cedfc 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -329,7 +329,8 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
 }
 
 InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                                   unsigned Index) {
+                                                   unsigned Index, Value *Op0,
+                                                   Value *Op1) {
   Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
                                    : Val;
   if (Opcode == Instruction::InsertElement) {
@@ -338,7 +339,8 @@ InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
     if (ElemTy->isIntegerTy(32))
       return Cost;
     // If it's not a 32-bit value, there will need to be an extract.
-    return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
+    return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index,
+                                     Op0, Op1);
   }
 
   if (Opcode == Instruction::ExtractElement)

diff  --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 49d9520b8323e..d41299ff64139 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -154,8 +154,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
                                    TTI::TargetCostKind CostKind,
                                    const Instruction *I = nullptr);
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
 
   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                  const Instruction *I = nullptr) {

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3b952f11be347..328a70ec43f6e 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -675,7 +675,8 @@ InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 }
 
 InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                               unsigned Index) {
+                                               unsigned Index, Value *Op0,
+                                               Value *Op1) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -685,7 +686,8 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   if (!CostFactor.isValid())
     return InstructionCost::getMax();
 
-  InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
+  InstructionCost Cost =
+      BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
   Cost *= CostFactor;
 
   if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
@@ -827,7 +829,8 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   if (Src->isVectorTy() && Opcode == Instruction::Store)
     for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
          ++i)
-      Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+      Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i, nullptr,
+                                 nullptr);
 
   return Cost;
 }

diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 9db903baf4070..810a7d0d62ef6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -126,8 +126,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I = nullptr);
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
   InstructionCost
   getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                   unsigned AddressSpace, TTI::TargetCostKind CostKind,

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 02ce1b135f7f2..ed8af25998b04 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1216,12 +1216,13 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 }
 
 InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                                 unsigned Index) {
+                                                 unsigned Index, Value *Op0,
+                                                 Value *Op1) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   if (Opcode != Instruction::ExtractElement &&
       Opcode != Instruction::InsertElement)
-    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
 
   // Legalize the type.
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
@@ -1235,7 +1236,7 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
     return LT.first;
 
   if (!isTypeLegal(Val))
-    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
 
   // In RVV, we could use vslidedown + vmv.x.s to extract element from vector
   // and vslideup + vmv.s.x to insert element to vector.

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 80c7ca3564d76..5df266ba35b5c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -157,8 +157,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
                                      const Instruction *I = nullptr);
 
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
 
   InstructionCost getArithmeticInstrCost(
       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

diff  --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 5d00e56ae3470..d6736319a4048 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -996,7 +996,8 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 }
 
 InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                                   unsigned Index) {
+                                                   unsigned Index, Value *Op0,
+                                                   Value *Op1) {
   // vlvgp will insert two grs into a vector register, so only count half the
   // number of instructions.
   if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64))
@@ -1012,7 +1013,7 @@ InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
     return Cost;
   }
 
-  return BaseT::getVectorInstrCost(Opcode, Val, Index);
+  return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
 }
 
 // Check if a load may be folded as a memory operand in its user.

diff  --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 5ac3d8149a1df..33c3778d572c7 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -107,8 +107,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I = nullptr);
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
   bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
   InstructionCost
   getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 38464627e7422..b94dcd63ad8be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -82,9 +82,10 @@ InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
 
 InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
                                                        Type *Val,
-                                                       unsigned Index) {
+                                                       unsigned Index,
+                                                       Value *Op0, Value *Op1) {
   InstructionCost Cost =
-      BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
+      BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
 
   // SIMD128's insert/extract currently only take constant indices.
   if (Index == -1u)

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 7eed7ef44af76..4f54a762042fe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -66,8 +66,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
       const Instruction *CxtI = nullptr);
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
 
   /// @}
 

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7d08a1654be78..5b6c7d86cebe4 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4257,7 +4257,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 }
 
 InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                               unsigned Index) {
+                                               unsigned Index, Value *Op0,
+                                               Value *Op1) {
   static const CostTblEntry SLMCostTbl[] = {
      { ISD::EXTRACT_VECTOR_ELT,       MVT::i8,      4 },
      { ISD::EXTRACT_VECTOR_ELT,       MVT::i16,     4 },
@@ -4330,6 +4331,14 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
       }
     }
 
+    MVT MScalarTy = LT.second.getScalarType();
+    auto IsCheapPInsrPExtrInsertPS = [&]() {
+      return (MScalarTy == MVT::i16 && ST->hasSSE2()) ||
+             (MScalarTy.isInteger() && ST->hasSSE41()) ||
+             (MScalarTy == MVT::f32 && ST->hasSSE41() &&
+              Opcode == Instruction::InsertElement);
+    };
+
     if (Index == 0) {
       // Floating point scalars are already located in index #0.
       // Many insertions to #0 can fold away for scalar fp-ops, so let's assume
@@ -4337,6 +4346,20 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
       if (ScalarType->isFloatingPointTy())
         return RegisterFileMoveCost;
 
+      if (Opcode == Instruction::InsertElement &&
+          isa_and_nonnull<UndefValue>(Op0)) {
+        // Consider the gather cost to be cheap.
+        if (isa_and_nonnull<LoadInst>(Op1))
+          return RegisterFileMoveCost;
+        if (!IsCheapPInsrPExtrInsertPS()) {
+          // mov constant-to-GPR + movd/movq GPR -> XMM.
+          if (isa_and_nonnull<Constant>(Op1) && Op1->getType()->isIntegerTy())
+            return 2 + RegisterFileMoveCost;
+          // Assume movd/movq GPR -> XMM is relatively cheap on all targets.
+          return 1 + RegisterFileMoveCost;
+        }
+      }
+
       // Assume movd/movq XMM -> GPR is relatively cheap on all targets.
       if (ScalarType->isIntegerTy() && Opcode == Instruction::ExtractElement)
         return 1 + RegisterFileMoveCost;
@@ -4344,19 +4367,13 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
 
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
     assert(ISD && "Unexpected vector opcode");
-    MVT MScalarTy = LT.second.getScalarType();
     if (ST->useSLMArithCosts())
       if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
         return Entry->Cost + RegisterFileMoveCost;
 
     // Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
-    if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
-        (MScalarTy.isInteger() && ST->hasSSE41()))
-      return 1 + RegisterFileMoveCost;
-
     // Assume insertps is relatively cheap on all targets.
-    if (MScalarTy == MVT::f32 && ST->hasSSE41() &&
-        Opcode == Instruction::InsertElement)
+    if (IsCheapPInsrPExtrInsertPS())
       return 1 + RegisterFileMoveCost;
 
     // For extractions we just need to shuffle the element to index 0, which
@@ -4383,7 +4400,8 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
     RegisterFileMoveCost += 1;
 
-  return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
+  return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1) +
+         RegisterFileMoveCost;
 }
 
 InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
@@ -5155,7 +5173,8 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
   }
 
   // Add the final extract element to the cost.
-  return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
+  return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
+                                            nullptr, nullptr);
 }
 
 InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
@@ -5455,7 +5474,8 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
   }
 
   // Add the final extract element to the cost.
-  return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
+  return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
+                                         nullptr, nullptr);
 }
 
 /// Calculate the cost of materializing a 64-bit value. This helper

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 666789e160dca..c189e503f4e81 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -147,8 +147,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I = nullptr);
   using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
-                                     unsigned Index);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
+                                     Value *Op0, Value *Op1);
   InstructionCost getScalarizationOverhead(VectorType *Ty,
                                            const APInt &DemandedElts,
                                            bool Insert, bool Extract);

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e1b52aa2f80e6..8ca422cfab9fd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6745,9 +6745,24 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       // broadcast.
       assert(VecTy == FinalVecTy &&
              "No reused scalars expected for broadcast.");
-      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
-                                 /*Mask=*/std::nullopt, CostKind, /*Index=*/0,
-                                 /*SubTp=*/nullptr, /*Args=*/VL[0]);
+      const auto *It =
+          find_if(VL, [](Value *V) { return !isa<UndefValue>(V); });
+      // If all values are undefs - consider cost free.
+      if (It == VL.end())
+        return TTI::TCC_Free;
+      // Add broadcast for non-identity shuffle only.
+      bool NeedShuffle =
+          VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof);
+      InstructionCost InsertCost =
+          TTI->getVectorInstrCost(Instruction::InsertElement, VecTy,
+                                  /*Index=*/0, PoisonValue::get(VecTy), *It);
+      return InsertCost + (NeedShuffle
+                               ? TTI->getShuffleCost(
+                                     TargetTransformInfo::SK_Broadcast, VecTy,
+                                     /*Mask=*/std::nullopt, CostKind,
+                                     /*Index=*/0,
+                                     /*SubTp=*/nullptr, /*Args=*/VL[0])
+                               : TTI::TCC_Free);
     }
     InstructionCost ReuseShuffleCost = 0;
     if (NeedToShuffleReuses)

diff  --git a/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll
index 3e0f4c11aadfa..1e96f97f16e98 100644
--- a/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll
@@ -20,7 +20,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
   %5 = extractelement <2 x i64> %2, i32 1
   %6 = getelementptr inbounds i32, ptr %A, i64 %5
   %7 = load i32, ptr %4, align 4
-  ;CHECK: cost of 1 {{.*}} insert
+  ;CHECK: cost of 0 {{.*}} insert
   %8 = insertelement <2 x i32> poison, i32 %7, i32 0
   %9 = load i32, ptr %6, align 4
   ;CHECK: cost of 1 {{.*}} insert

diff  --git a/llvm/test/Analysis/CostModel/X86/loop_v2.ll b/llvm/test/Analysis/CostModel/X86/loop_v2.ll
index a9cbaaf2fd630..8f67b365ca9b1 100644
--- a/llvm/test/Analysis/CostModel/X86/loop_v2.ll
+++ b/llvm/test/Analysis/CostModel/X86/loop_v2.ll
@@ -20,7 +20,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
   %5 = extractelement <2 x i64> %2, i32 1
   %6 = getelementptr inbounds i32, ptr %A, i64 %5
   %7 = load i32, ptr %4, align 4
-  ;CHECK: cost of 1 {{.*}} insert
+  ;CHECK: cost of 0 {{.*}} insert
   %8 = insertelement <2 x i32> undef, i32 %7, i32 0
   %9 = load i32, ptr %6, align 4
   ;CHECK: cost of 1 {{.*}} insert

diff  --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
index 381e5b6308125..897344d622d0f 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -1907,7 +1907,7 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32>
 
 define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
 ; SSE2-LABEL: 'test_gather_16f32_const_mask2'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
@@ -1966,7 +1966,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
 
 define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
 ; SSE2-LABEL: 'test_scatter_16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>

diff  --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 2fa41968e8073..5f22b2e39f947 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1907,7 +1907,7 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32>
 
 define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
 ; SSE2-LABEL: 'test_gather_16f32_const_mask2'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
@@ -1966,7 +1966,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
 
 define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
 ; SSE2-LABEL: 'test_scatter_16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
index 2296b3d5b0c45..e6a4de688186e 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll
@@ -382,58 +382,58 @@ define i32 @insert_i64(i32 %arg) {
 define i32 @insert_i32(i32 %arg) {
 ; SSE2-LABEL: 'insert_i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i32'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i32'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -664,100 +664,100 @@ define i32 @insert_i16(i32 %arg) {
 define i32 @insert_i8(i32 %arg) {
 ; SSE2-LABEL: 'insert_i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -971,31 +971,31 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -1004,31 +1004,31 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -1037,31 +1037,31 @@ define i32 @insert_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;

diff  --git a/llvm/test/Analysis/CostModel/X86/vector-insert.ll b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
index d790aefa91640..fd40da9ff8b76 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert.ll
@@ -382,58 +382,58 @@ define i32 @insert_i64(i32 %arg) {
 define i32 @insert_i32(i32 %arg) {
 ; SSE2-LABEL: 'insert_i32'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i32'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i32'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -664,100 +664,100 @@ define i32 @insert_i16(i32 %arg) {
 define i32 @insert_i8(i32 %arg) {
 ; SSE2-LABEL: 'insert_i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'insert_i8'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'insert_i8'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> undef, i8 undef, i32 3
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_a = insertelement <8 x i8> undef, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> undef, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -971,31 +971,31 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -1004,31 +1004,31 @@ define i32 @insert_i1(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
@@ -1037,31 +1037,31 @@ define i32 @insert_i1(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_a = insertelement <4 x i1> undef, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> undef, i1 undef, i32 2
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_a = insertelement <8 x i1> undef, i1 undef, i32 %arg
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> undef, i1 undef, i32 4
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_a = insertelement <16 x i1> undef, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> undef, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> undef, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_a = insertelement <32 x i1> undef, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> undef, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> undef, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> undef, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> undef, i1 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> undef, i1 undef, i32 31
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_a = insertelement <64 x i1> undef, i1 undef, i32 %arg
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_0 = insertelement <64 x i1> undef, i1 undef, i32 0
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> undef, i1 undef, i32 7
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> undef, i1 undef, i32 8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_15 = insertelement <64 x i1> undef, i1 undef, i32 15
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_24 = insertelement <64 x i1> undef, i1 undef, i32 24
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_31 = insertelement <64 x i1> undef, i1 undef, i32 31
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_32 = insertelement <64 x i1> undef, i1 undef, i32 32
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i1_48 = insertelement <64 x i1> undef, i1 undef, i32 48
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i1_63 = insertelement <64 x i1> undef, i1 undef, i32 63
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
index 0743368842c71..8e3f38c796e6e 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
@@ -400,7 +400,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
@@ -437,7 +437,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
@@ -486,7 +486,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
@@ -534,35 +534,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; CHECK-LABEL: 'splatvar_shift_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -572,7 +548,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
@@ -621,7 +597,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
@@ -811,7 +787,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -860,7 +836,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -915,7 +891,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index 939586ff80957..d4ece3b2a1134 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -443,17 +443,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v2i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
@@ -492,17 +486,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
@@ -541,17 +529,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
@@ -590,17 +572,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -627,17 +603,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
@@ -676,17 +646,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -879,7 +843,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
@@ -928,7 +892,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
@@ -995,7 +959,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index fa057ef67cc58..f8fb2d76a778f 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -443,17 +443,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v2i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -492,17 +486,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -541,17 +529,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -590,17 +572,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -627,17 +603,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -676,17 +646,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -879,7 +843,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
@@ -928,7 +892,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
@@ -995,7 +959,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
index fe9e0edd5a183..af5278e355cfb 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
@@ -408,7 +408,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
@@ -445,7 +445,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
@@ -494,7 +494,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
@@ -543,7 +543,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
@@ -592,7 +592,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
@@ -641,7 +641,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
@@ -843,7 +843,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -898,7 +898,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -953,7 +953,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
index 780ec54ca6785..8286b3967af60 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
@@ -408,7 +408,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
@@ -445,7 +445,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
@@ -494,7 +494,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
@@ -542,35 +542,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; CHECK-LABEL: 'splatvar_shift_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -580,7 +556,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
@@ -629,7 +605,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
@@ -819,7 +795,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -868,7 +844,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -923,7 +899,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
index f9094c3d61615..7c506af62e817 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
@@ -407,35 +407,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; CHECK-LABEL: 'splatvar_shift_v2i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -445,7 +421,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
@@ -494,7 +470,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
@@ -542,35 +518,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; CHECK-LABEL: 'splatvar_shift_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -580,7 +532,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
@@ -629,7 +581,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
@@ -819,7 +771,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -862,7 +814,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -911,7 +863,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index 1cb3d919ac9c0..855d2b8e88e3a 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -447,17 +447,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
@@ -484,17 +478,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
@@ -533,17 +521,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
@@ -582,17 +564,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -619,17 +595,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
@@ -668,17 +638,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -871,7 +835,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
@@ -920,7 +884,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
@@ -969,7 +933,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index a246a42f77bb9..55894dc932957 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -447,17 +447,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -484,17 +478,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -533,17 +521,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -582,17 +564,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -619,17 +595,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -668,17 +638,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -871,7 +835,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
@@ -920,7 +884,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
@@ -969,7 +933,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
index 791745306972f..92fe253d38e73 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
@@ -431,17 +431,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v2i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -480,17 +474,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -529,17 +517,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -578,17 +560,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -627,17 +603,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -676,17 +646,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -879,7 +843,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -934,7 +898,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -983,7 +947,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
index 688bdba0d188b..fe8a7d91cc375 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
@@ -411,35 +411,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; CHECK-LABEL: 'splatvar_shift_v2i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -449,7 +425,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
@@ -498,7 +474,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
@@ -546,35 +522,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; CHECK-LABEL: 'splatvar_shift_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -584,7 +536,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
@@ -633,7 +585,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
@@ -823,7 +775,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -866,7 +818,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -915,7 +867,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
index 5a30f643bf211..07229e22873c4 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
@@ -407,35 +407,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; CHECK-LABEL: 'splatvar_shift_v2i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -445,7 +421,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
@@ -494,7 +470,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
@@ -542,35 +518,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; CHECK-LABEL: 'splatvar_shift_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -580,7 +532,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
@@ -629,7 +581,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
@@ -819,7 +771,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -868,7 +820,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -917,7 +869,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index 178b3366cad92..d8737d2365649 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -471,17 +471,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
@@ -500,12 +494,6 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v2i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -514,17 +502,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
@@ -555,12 +537,6 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v4i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
@@ -569,17 +545,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
@@ -610,12 +580,6 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v8i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> poison, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -624,17 +588,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -653,12 +611,6 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v4i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -667,17 +619,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
@@ -708,12 +654,6 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v8i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -722,17 +662,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
@@ -763,12 +697,6 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v16i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> poison, i32 %b, i32 0
   %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
@@ -949,7 +877,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
@@ -1004,7 +932,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
@@ -1059,7 +987,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index b8f9e0683fb87..b254ff27daccb 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -471,17 +471,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v2i64'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -500,12 +494,6 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v2i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -514,17 +502,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -555,12 +537,6 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v4i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -569,17 +545,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -610,12 +580,6 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v8i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -624,17 +588,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
 ; AVX-LABEL: 'splatvar_shift_v4i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -653,12 +611,6 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v4i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -667,17 +619,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -708,12 +654,6 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v8i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -722,17 +662,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -763,12 +697,6 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v16i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -949,7 +877,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
@@ -1004,7 +932,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
@@ -1059,7 +987,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
index 06b97aeb3c4f9..c53dbc5f586aa 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
@@ -427,17 +427,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v2i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v2i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
@@ -476,17 +470,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -525,17 +513,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -574,17 +556,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v4i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
@@ -623,17 +599,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -672,17 +642,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -875,7 +839,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -924,7 +888,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -973,7 +937,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
index abdf29c77b7f0..d1c8a2fc47080 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
@@ -475,41 +475,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ;
 
 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v2i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
+; CHECK-LABEL: 'splatvar_shift_v2i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %insert = insertelement <2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -518,17 +488,11 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
 }
 
 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v4i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v4i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
@@ -559,12 +523,6 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v4i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %insert = insertelement <4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -573,17 +531,11 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
 }
 
 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
+; SSE-LABEL: 'splatvar_shift_v8i64'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
@@ -614,12 +566,6 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v8i64'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %insert = insertelement <8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -628,41 +574,11 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
 }
 
 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX-LABEL: 'splatvar_shift_v4i32'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; XOP-LABEL: 'splatvar_shift_v4i32'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; AVX512-LABEL: 'splatvar_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v4i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
+; CHECK-LABEL: 'splatvar_shift_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %insert = insertelement <4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -671,17 +587,11 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
 }
 
 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v8i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v8i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
@@ -712,12 +622,6 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v8i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %insert = insertelement <8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -726,17 +630,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
 }
 
 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
-; SSE2-LABEL: 'splatvar_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; SSE42-LABEL: 'splatvar_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
+; SSE-LABEL: 'splatvar_shift_v16i32'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatvar_shift_v16i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
@@ -767,12 +665,6 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i32> %a, %splat
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
-;
-; SLM-LABEL: 'splatvar_shift_v16i32'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %insert = insertelement <16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -941,7 +833,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
 
 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
@@ -996,7 +888,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
 
 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
@@ -1051,7 +943,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
 
 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
 ; SSE2-LABEL: 'splatvar_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index 16fabea041403..a520ddd679dc8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -345,9 +345,12 @@ define void @cse_for_hoisted_instructions_in_preheader(ptr %dst, i32 %a, i1 %c)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> <i32 22, i32 22>, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], <i32 3, i32 3>
 ; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[OR_2:%.*]] = or i32 [[A]], 3
 ; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 10
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[GEP_2]], align 4
+; CHECK-NEXT:    store i32 [[OR_2]], ptr [[GEP_2]], align 4
+; CHECK-NEXT:    [[OR_3:%.*]] = or i32 [[A]], 3
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 11
+; CHECK-NEXT:    store i32 [[OR_3]], ptr [[GEP_3]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
index 5baa8bca154a8..b9f1a42c56302 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
@@ -11,11 +11,23 @@ define void @test() #0 {
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[BB1]] ], [ undef, [[BB]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[TMP]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[SHUFFLE]])
-; CHECK-NEXT:    [[OP_RDX]] = mul i32 [[TMP1]], undef
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ undef, [[BB]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 undef, [[TMP]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP]]
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP]]
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP]]
+; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP]]
+; CHECK-NEXT:    [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP]]
+; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP]]
+; CHECK-NEXT:    [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP]]
+; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP]]
+; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP]]
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP]]
+; CHECK-NEXT:    [[TMP17:%.*]] = mul i32 [[TMP16]], [[TMP]]
+; CHECK-NEXT:    [[TMP18]] = mul i32 [[TMP17]], [[TMP]]
 ; CHECK-NEXT:    br label [[BB1]]
 ;
 bb:
@@ -51,10 +63,10 @@ define void @test_2(ptr addrspace(1) %arg, i32 %arg1) #0 {
 ; CHECK-NEXT:    [[TMP:%.*]] = phi i32 [ undef, [[BB:%.*]] ], [ undef, [[BB2]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ 0, [[BB]] ], [ undef, [[BB2]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP1]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], undef
 ; CHECK-NEXT:    call void @use(i32 [[OP_RDX1]])
 ; CHECK-NEXT:    br label [[BB2]]
@@ -96,66 +108,56 @@ define i64 @test_3() #0 {
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ undef, [[BB1]] ], [ poison, [[BB2:%.*]] ]
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[TMP1]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[TMP1]], i32 2
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[TMP1]], i32 3
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[TMP1]], i32 4
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[TMP1]], i32 5
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[TMP1]], i32 6
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[TMP1]], i32 7
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP1]], i32 8
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP1]], i32 9
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP1]], i32 10
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP1]], i32 11
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP1]], i32 12
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP1]], i32 13
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP1]], i32 14
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[TMP1]], i32 15
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <32 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP1]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP1]], i32 2
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP1]], i32 3
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP1]], i32 4
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP1]], i32 5
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP1]], i32 6
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP1]], i32 7
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP1]], i32 8
-; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP1]], i32 9
-; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP1]], i32 10
-; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP1]], i32 11
-; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP1]], i32 12
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP1]], i32 13
-; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP1]], i32 14
-; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP1]], i32 15
-; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP1]], i32 16
-; CHECK-NEXT:    [[TMP35:%.*]] = insertelement <32 x i32> [[TMP34]], i32 [[TMP1]], i32 17
-; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <32 x i32> [[TMP35]], i32 [[TMP1]], i32 18
-; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <32 x i32> [[TMP36]], i32 [[TMP1]], i32 19
-; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <32 x i32> [[TMP37]], i32 [[TMP1]], i32 20
-; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <32 x i32> [[TMP38]], i32 [[TMP1]], i32 21
-; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <32 x i32> [[TMP39]], i32 [[TMP1]], i32 22
-; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <32 x i32> [[TMP40]], i32 [[TMP1]], i32 23
-; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <32 x i32> [[TMP41]], i32 [[TMP1]], i32 24
-; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <32 x i32> [[TMP42]], i32 [[TMP1]], i32 25
-; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <32 x i32> [[TMP43]], i32 [[TMP1]], i32 26
-; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <32 x i32> [[TMP44]], i32 [[TMP1]], i32 27
-; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <32 x i32> [[TMP45]], i32 [[TMP1]], i32 28
-; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <32 x i32> [[TMP46]], i32 [[TMP1]], i32 29
-; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <32 x i32> [[TMP47]], i32 [[TMP1]], i32 30
-; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <32 x i32> [[TMP48]], i32 [[TMP1]], i32 31
-; CHECK-NEXT:    [[TMP50:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP49]])
-; CHECK-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP17]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT:    [[TMP52:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[SHUFFLE]])
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP52]]
-; CHECK-NEXT:    [[OP_RDX2:%.*]] = mul i32 [[OP_RDX1]], [[TMP1]]
-; CHECK-NEXT:    [[OP_RDX3:%.*]] = mul i32 [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[OP_RDX4:%.*]] = mul i32 [[OP_RDX2]], [[OP_RDX3]]
-; CHECK-NEXT:    [[OP_RDX5:%.*]] = mul i32 [[OP_RDX4]], [[TMP1]]
-; CHECK-NEXT:    [[VAL64:%.*]] = add i32 undef, [[OP_RDX5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i32> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <32 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <32 x i32> [[TMP3]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <32 x i32> [[TMP4]], i32 [[TMP2]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <32 x i32> [[TMP5]], i32 [[TMP2]], i32 3
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <32 x i32> [[TMP6]], i32 [[TMP2]], i32 4
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <32 x i32> [[TMP7]], i32 [[TMP2]], i32 5
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <32 x i32> [[TMP8]], i32 [[TMP2]], i32 6
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <32 x i32> [[TMP9]], i32 [[TMP2]], i32 7
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <32 x i32> [[TMP10]], i32 [[TMP2]], i32 8
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <32 x i32> [[TMP11]], i32 [[TMP2]], i32 9
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 [[TMP2]], i32 10
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <32 x i32> [[TMP13]], i32 [[TMP2]], i32 11
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <32 x i32> [[TMP14]], i32 [[TMP2]], i32 12
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <32 x i32> [[TMP15]], i32 [[TMP2]], i32 13
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <32 x i32> [[TMP16]], i32 [[TMP2]], i32 14
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <32 x i32> [[TMP17]], i32 [[TMP2]], i32 15
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP2]], i32 16
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP2]], i32 17
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP2]], i32 18
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP2]], i32 19
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP2]], i32 20
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP2]], i32 21
+; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP2]], i32 22
+; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP2]], i32 23
+; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP2]], i32 24
+; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP2]], i32 25
+; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP2]], i32 26
+; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP2]], i32 27
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP2]], i32 28
+; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP2]], i32 29
+; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP2]], i32 30
+; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP2]], i32 31
+; CHECK-NEXT:    [[TMP35:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP34]])
+; CHECK-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 [[TMP35]], [[TMP36]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = mul i32 [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = mul i32 [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = mul i32 [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX5:%.*]] = mul i32 [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX6:%.*]] = mul i32 [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX7:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
+; CHECK-NEXT:    [[OP_RDX8:%.*]] = mul i32 [[OP_RDX3]], [[OP_RDX4]]
+; CHECK-NEXT:    [[OP_RDX9:%.*]] = mul i32 [[OP_RDX5]], [[OP_RDX6]]
+; CHECK-NEXT:    [[OP_RDX10:%.*]] = mul i32 [[OP_RDX7]], [[OP_RDX8]]
+; CHECK-NEXT:    [[OP_RDX11:%.*]] = mul i32 [[OP_RDX9]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX12:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]]
+; CHECK-NEXT:    [[VAL64:%.*]] = add i32 undef, [[OP_RDX12]]
 ; CHECK-NEXT:    [[VAL65:%.*]] = sext i32 [[VAL64]] to i64
 ; CHECK-NEXT:    ret i64 [[VAL65]]
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll
index 5b0ab04c7ba07..3bc6e64606e39 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll
@@ -22,7 +22,7 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) {
   ; YAML-NEXT: Function:        test
   ; YAML-NEXT: Args:
   ; YAML-NEXT:   - String:          'Vectorized horizontal reduction with cost '
-  ; YAML-NEXT:   - Cost:            '-3'
+  ; YAML-NEXT:   - Cost:            '-1'
   ; YAML-NEXT:   - String:          ' and with tree size '
   ; YAML-NEXT:   - TreeSize:        '7'
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
index 3ce914d336575..92760b43e273b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
@@ -21,222 +21,233 @@ define void @n() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 1), align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 2), align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 3), align 4
-; CHECK-NEXT:    [[TMP13:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 0), align 16
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 0), align 16
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 1), align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 2), align 8
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 3), align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 0), align 16
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 1), align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 2), align 8
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 3), align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 0), align 16
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 1), align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 2), align 8
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 3), align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 0), align 16
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 1), align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 2), align 8
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 3), align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[SPEC_SELECT8_3_7:%.*]], [[FOR_COND]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], -183
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]]
-; CHECK-NEXT:    [[TMP18:%.*]] = icmp slt <4 x i32> [[TMP17]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP17]]
-; CHECK-NEXT:    [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> [[TMP17]]
-; CHECK-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP20]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = icmp slt i32 [[TMP21]], [[B_0]]
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP21]], i32 [[B_0]]
-; CHECK-NEXT:    [[SUB_116:%.*]] = sub i32 [[TMP15]], [[TMP1]]
-; CHECK-NEXT:    [[TMP22:%.*]] = icmp slt i32 [[SUB_116]], 0
+; CHECK-NEXT:    [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[TMP30:%.*]] = add i32 [[TMP29]], -183
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0
+; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP33:%.*]] = sub <4 x i32> [[TMP32]], [[TMP0]]
+; CHECK-NEXT:    [[TMP34:%.*]] = icmp slt <4 x i32> [[TMP33]], zeroinitializer
+; CHECK-NEXT:    [[TMP35:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP33]]
+; CHECK-NEXT:    [[TMP36:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP35]], <4 x i32> [[TMP33]]
+; CHECK-NEXT:    [[TMP37:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP36]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = icmp slt i32 [[TMP37]], [[B_0]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP37]], i32 [[B_0]]
+; CHECK-NEXT:    [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]]
+; CHECK-NEXT:    [[TMP38:%.*]] = icmp slt i32 [[SUB_116]], 0
 ; CHECK-NEXT:    [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]]
-; CHECK-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[NEG_117]], i32 [[SUB_116]]
-; CHECK-NEXT:    [[CMP12_118:%.*]] = icmp slt i32 [[TMP23]], [[OP_RDX1]]
-; CHECK-NEXT:    [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP23]], i32 [[OP_RDX1]]
-; CHECK-NEXT:    [[SUB_1_1:%.*]] = sub i32 [[TMP15]], [[TMP2]]
-; CHECK-NEXT:    [[TMP24:%.*]] = icmp slt i32 [[SUB_1_1]], 0
+; CHECK-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_117]], i32 [[SUB_116]]
+; CHECK-NEXT:    [[CMP12_118:%.*]] = icmp slt i32 [[TMP39]], [[OP_RDX1]]
+; CHECK-NEXT:    [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP39]], i32 [[OP_RDX1]]
+; CHECK-NEXT:    [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]]
+; CHECK-NEXT:    [[TMP40:%.*]] = icmp slt i32 [[SUB_1_1]], 0
 ; CHECK-NEXT:    [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]]
-; CHECK-NEXT:    [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
-; CHECK-NEXT:    [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP25]], [[SPEC_SELECT8_120]]
+; CHECK-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
+; CHECK-NEXT:    [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP41]], [[SPEC_SELECT8_120]]
 ; CHECK-NEXT:    [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP25]], i32 [[SPEC_SELECT8_120]]
-; CHECK-NEXT:    [[SUB_2_1:%.*]] = sub i32 [[TMP15]], [[TMP3]]
-; CHECK-NEXT:    [[TMP26:%.*]] = icmp slt i32 [[SUB_2_1]], 0
+; CHECK-NEXT:    [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP41]], i32 [[SPEC_SELECT8_120]]
+; CHECK-NEXT:    [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]]
+; CHECK-NEXT:    [[TMP42:%.*]] = icmp slt i32 [[SUB_2_1]], 0
 ; CHECK-NEXT:    [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]]
-; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
-; CHECK-NEXT:    [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP27]], [[SPEC_SELECT8_1_1]]
+; CHECK-NEXT:    [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
+; CHECK-NEXT:    [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP43]], [[SPEC_SELECT8_1_1]]
 ; CHECK-NEXT:    [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP27]], i32 [[SPEC_SELECT8_1_1]]
-; CHECK-NEXT:    [[SUB_3_1:%.*]] = sub i32 [[TMP15]], [[TMP4]]
-; CHECK-NEXT:    [[TMP28:%.*]] = icmp slt i32 [[SUB_3_1]], 0
+; CHECK-NEXT:    [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP43]], i32 [[SPEC_SELECT8_1_1]]
+; CHECK-NEXT:    [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]]
+; CHECK-NEXT:    [[TMP44:%.*]] = icmp slt i32 [[SUB_3_1]], 0
 ; CHECK-NEXT:    [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]]
-; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
-; CHECK-NEXT:    [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP29]], [[SPEC_SELECT8_2_1]]
+; CHECK-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
+; CHECK-NEXT:    [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP45]], [[SPEC_SELECT8_2_1]]
 ; CHECK-NEXT:    [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]]
 ; CHECK-NEXT:    [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32
-; CHECK-NEXT:    [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP29]], i32 [[SPEC_SELECT8_2_1]]
-; CHECK-NEXT:    [[SUB_222:%.*]] = sub i32 [[TMP15]], [[TMP5]]
-; CHECK-NEXT:    [[TMP30:%.*]] = icmp slt i32 [[SUB_222]], 0
+; CHECK-NEXT:    [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP45]], i32 [[SPEC_SELECT8_2_1]]
+; CHECK-NEXT:    [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]]
+; CHECK-NEXT:    [[TMP46:%.*]] = icmp slt i32 [[SUB_222]], 0
 ; CHECK-NEXT:    [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]]
-; CHECK-NEXT:    [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[NEG_223]], i32 [[SUB_222]]
-; CHECK-NEXT:    [[CMP12_224:%.*]] = icmp slt i32 [[TMP31]], [[SPEC_SELECT8_3_1]]
-; CHECK-NEXT:    [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP31]], i32 [[SPEC_SELECT8_3_1]]
-; CHECK-NEXT:    [[SUB_1_2:%.*]] = sub i32 [[TMP15]], [[TMP6]]
-; CHECK-NEXT:    [[TMP32:%.*]] = icmp slt i32 [[SUB_1_2]], 0
+; CHECK-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_223]], i32 [[SUB_222]]
+; CHECK-NEXT:    [[CMP12_224:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_3_1]]
+; CHECK-NEXT:    [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP47]], i32 [[SPEC_SELECT8_3_1]]
+; CHECK-NEXT:    [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]]
+; CHECK-NEXT:    [[TMP48:%.*]] = icmp slt i32 [[SUB_1_2]], 0
 ; CHECK-NEXT:    [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]]
-; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
-; CHECK-NEXT:    [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP33]], [[SPEC_SELECT8_226]]
-; CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP33]], i32 [[SPEC_SELECT8_226]]
-; CHECK-NEXT:    [[SUB_2_2:%.*]] = sub i32 [[TMP15]], [[TMP7]]
-; CHECK-NEXT:    [[TMP35:%.*]] = icmp slt i32 [[SUB_2_2]], 0
+; CHECK-NEXT:    [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
+; CHECK-NEXT:    [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP49]], [[SPEC_SELECT8_226]]
+; CHECK-NEXT:    [[TMP50:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP49]], i32 [[SPEC_SELECT8_226]]
+; CHECK-NEXT:    [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]]
+; CHECK-NEXT:    [[TMP51:%.*]] = icmp slt i32 [[SUB_2_2]], 0
 ; CHECK-NEXT:    [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]]
-; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
-; CHECK-NEXT:    [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_1_2]]
-; CHECK-NEXT:    [[TMP37:%.*]] = or i1 [[CMP12_2_2]], [[TMP34]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_1_2]]
-; CHECK-NEXT:    [[SUB_3_2:%.*]] = sub i32 [[TMP15]], [[TMP8]]
-; CHECK-NEXT:    [[TMP38:%.*]] = icmp slt i32 [[SUB_3_2]], 0
+; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
+; CHECK-NEXT:    [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP52]], [[SPEC_SELECT8_1_2]]
+; CHECK-NEXT:    [[TMP53:%.*]] = or i1 [[CMP12_2_2]], [[TMP50]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP52]], i32 [[SPEC_SELECT8_1_2]]
+; CHECK-NEXT:    [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]]
+; CHECK-NEXT:    [[TMP54:%.*]] = icmp slt i32 [[SUB_3_2]], 0
 ; CHECK-NEXT:    [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]]
-; CHECK-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
-; CHECK-NEXT:    [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_2_2]]
-; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[CMP12_3_2]], [[TMP37]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP40]], i32 2, i32 [[SPEC_SELECT_3_1]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_2_2]]
-; CHECK-NEXT:    [[SUB_328:%.*]] = sub i32 [[TMP15]], [[TMP9]]
-; CHECK-NEXT:    [[TMP41:%.*]] = icmp slt i32 [[SUB_328]], 0
+; CHECK-NEXT:    [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
+; CHECK-NEXT:    [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP55]], [[SPEC_SELECT8_2_2]]
+; CHECK-NEXT:    [[TMP56:%.*]] = or i1 [[CMP12_3_2]], [[TMP53]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP56]], i32 2, i32 [[SPEC_SELECT_3_1]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP55]], i32 [[SPEC_SELECT8_2_2]]
+; CHECK-NEXT:    [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]]
+; CHECK-NEXT:    [[TMP57:%.*]] = icmp slt i32 [[SUB_328]], 0
 ; CHECK-NEXT:    [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]]
-; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_329]], i32 [[SUB_328]]
-; CHECK-NEXT:    [[CMP12_330:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_3_2]]
-; CHECK-NEXT:    [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP42]], i32 [[SPEC_SELECT8_3_2]]
-; CHECK-NEXT:    [[SUB_1_3:%.*]] = sub i32 [[TMP15]], [[TMP10]]
-; CHECK-NEXT:    [[TMP43:%.*]] = icmp slt i32 [[SUB_1_3]], 0
+; CHECK-NEXT:    [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[NEG_329]], i32 [[SUB_328]]
+; CHECK-NEXT:    [[CMP12_330:%.*]] = icmp slt i32 [[TMP58]], [[SPEC_SELECT8_3_2]]
+; CHECK-NEXT:    [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP58]], i32 [[SPEC_SELECT8_3_2]]
+; CHECK-NEXT:    [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]]
+; CHECK-NEXT:    [[TMP59:%.*]] = icmp slt i32 [[SUB_1_3]], 0
 ; CHECK-NEXT:    [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]]
-; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
-; CHECK-NEXT:    [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_332]]
-; CHECK-NEXT:    [[TMP45:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP44]], i32 [[SPEC_SELECT8_332]]
-; CHECK-NEXT:    [[SUB_2_3:%.*]] = sub i32 [[TMP15]], [[TMP11]]
-; CHECK-NEXT:    [[TMP46:%.*]] = icmp slt i32 [[SUB_2_3]], 0
+; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
+; CHECK-NEXT:    [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_332]]
+; CHECK-NEXT:    [[TMP61:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP60]], i32 [[SPEC_SELECT8_332]]
+; CHECK-NEXT:    [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]]
+; CHECK-NEXT:    [[TMP62:%.*]] = icmp slt i32 [[SUB_2_3]], 0
 ; CHECK-NEXT:    [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]]
-; CHECK-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
-; CHECK-NEXT:    [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_3]]
-; CHECK-NEXT:    [[TMP48:%.*]] = or i1 [[CMP12_2_3]], [[TMP45]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_3]]
-; CHECK-NEXT:    [[SUB_3_3:%.*]] = sub i32 [[TMP15]], [[TMP12]]
-; CHECK-NEXT:    [[TMP49:%.*]] = icmp slt i32 [[SUB_3_3]], 0
+; CHECK-NEXT:    [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
+; CHECK-NEXT:    [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP63]], [[SPEC_SELECT8_1_3]]
+; CHECK-NEXT:    [[TMP64:%.*]] = or i1 [[CMP12_2_3]], [[TMP61]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP63]], i32 [[SPEC_SELECT8_1_3]]
+; CHECK-NEXT:    [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]]
+; CHECK-NEXT:    [[TMP65:%.*]] = icmp slt i32 [[SUB_3_3]], 0
 ; CHECK-NEXT:    [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]]
-; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
-; CHECK-NEXT:    [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_3]]
-; CHECK-NEXT:    [[TMP51:%.*]] = or i1 [[CMP12_3_3]], [[TMP48]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP51]], i32 3, i32 [[SPEC_SELECT_3_2]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_3]]
-; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0
-; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP52]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP53:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]]
-; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <16 x i32> [[TMP53]], i32 0
-; CHECK-NEXT:    [[NEG_4:%.*]] = sub nsw i32 0, [[TMP54]]
-; CHECK-NEXT:    [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP53]], zeroinitializer
-; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <16 x i1> [[TMP55]], i32 0
-; CHECK-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_4]], i32 [[TMP54]]
-; CHECK-NEXT:    [[CMP12_4:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_3]]
-; CHECK-NEXT:    [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_3]]
-; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <16 x i32> [[TMP53]], i32 1
-; CHECK-NEXT:    [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP58]]
-; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <16 x i1> [[TMP55]], i32 1
-; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_4]], i32 [[TMP58]]
-; CHECK-NEXT:    [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_4]]
-; CHECK-NEXT:    [[TMP61:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP60]], i32 [[SPEC_SELECT8_4]]
-; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <16 x i32> [[TMP53]], i32 2
-; CHECK-NEXT:    [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP62]]
-; CHECK-NEXT:    [[TMP63:%.*]] = extractelement <16 x i1> [[TMP55]], i32 2
-; CHECK-NEXT:    [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_2_4]], i32 [[TMP62]]
-; CHECK-NEXT:    [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_1_4]]
-; CHECK-NEXT:    [[TMP65:%.*]] = or i1 [[CMP12_2_4]], [[TMP61]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP64]], i32 [[SPEC_SELECT8_1_4]]
-; CHECK-NEXT:    [[TMP66:%.*]] = extractelement <16 x i32> [[TMP53]], i32 3
-; CHECK-NEXT:    [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP66]]
-; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <16 x i1> [[TMP55]], i32 3
-; CHECK-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_4]], i32 [[TMP66]]
-; CHECK-NEXT:    [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_4]]
-; CHECK-NEXT:    [[TMP69:%.*]] = or i1 [[CMP12_3_4]], [[TMP65]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP69]], i32 4, i32 [[SPEC_SELECT_3_3]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_4]]
-; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <16 x i32> [[TMP53]], i32 4
-; CHECK-NEXT:    [[NEG_5:%.*]] = sub nsw i32 0, [[TMP70]]
-; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <16 x i1> [[TMP55]], i32 4
-; CHECK-NEXT:    [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[NEG_5]], i32 [[TMP70]]
-; CHECK-NEXT:    [[CMP12_5:%.*]] = icmp slt i32 [[TMP72]], [[SPEC_SELECT8_3_4]]
-; CHECK-NEXT:    [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP72]], i32 [[SPEC_SELECT8_3_4]]
-; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <16 x i32> [[TMP53]], i32 5
-; CHECK-NEXT:    [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP73]]
-; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <16 x i1> [[TMP55]], i32 5
-; CHECK-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_5]], i32 [[TMP73]]
-; CHECK-NEXT:    [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_5]]
-; CHECK-NEXT:    [[TMP76:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP75]], i32 [[SPEC_SELECT8_5]]
-; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <16 x i32> [[TMP53]], i32 6
-; CHECK-NEXT:    [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP77]]
-; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <16 x i1> [[TMP55]], i32 6
-; CHECK-NEXT:    [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_5]], i32 [[TMP77]]
-; CHECK-NEXT:    [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_5]]
-; CHECK-NEXT:    [[TMP80:%.*]] = or i1 [[CMP12_2_5]], [[TMP76]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_5]]
-; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <16 x i32> [[TMP53]], i32 7
-; CHECK-NEXT:    [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP81]]
-; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <16 x i1> [[TMP55]], i32 7
-; CHECK-NEXT:    [[TMP83:%.*]] = select i1 [[TMP82]], i32 [[NEG_3_5]], i32 [[TMP81]]
-; CHECK-NEXT:    [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP83]], [[SPEC_SELECT8_2_5]]
-; CHECK-NEXT:    [[TMP84:%.*]] = or i1 [[CMP12_3_5]], [[TMP80]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP84]], i32 5, i32 [[SPEC_SELECT_3_4]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP83]], i32 [[SPEC_SELECT8_2_5]]
-; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <16 x i32> [[TMP53]], i32 8
-; CHECK-NEXT:    [[NEG_6:%.*]] = sub nsw i32 0, [[TMP85]]
-; CHECK-NEXT:    [[TMP86:%.*]] = extractelement <16 x i1> [[TMP55]], i32 8
-; CHECK-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_6]], i32 [[TMP85]]
-; CHECK-NEXT:    [[CMP12_6:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_3_5]]
-; CHECK-NEXT:    [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP87]], i32 [[SPEC_SELECT8_3_5]]
-; CHECK-NEXT:    [[TMP88:%.*]] = extractelement <16 x i32> [[TMP53]], i32 9
-; CHECK-NEXT:    [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP88]]
-; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <16 x i1> [[TMP55]], i32 9
-; CHECK-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_1_6]], i32 [[TMP88]]
-; CHECK-NEXT:    [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_6]]
-; CHECK-NEXT:    [[TMP91:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_6]]
-; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <16 x i32> [[TMP53]], i32 10
-; CHECK-NEXT:    [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP92]]
-; CHECK-NEXT:    [[TMP93:%.*]] = extractelement <16 x i1> [[TMP55]], i32 10
-; CHECK-NEXT:    [[TMP94:%.*]] = select i1 [[TMP93]], i32 [[NEG_2_6]], i32 [[TMP92]]
-; CHECK-NEXT:    [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP94]], [[SPEC_SELECT8_1_6]]
-; CHECK-NEXT:    [[TMP95:%.*]] = or i1 [[CMP12_2_6]], [[TMP91]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP94]], i32 [[SPEC_SELECT8_1_6]]
-; CHECK-NEXT:    [[TMP96:%.*]] = extractelement <16 x i32> [[TMP53]], i32 11
-; CHECK-NEXT:    [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP96]]
-; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <16 x i1> [[TMP55]], i32 11
-; CHECK-NEXT:    [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[TMP96]]
-; CHECK-NEXT:    [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]]
-; CHECK-NEXT:    [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP95]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]]
-; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <16 x i32> [[TMP53]], i32 12
-; CHECK-NEXT:    [[NEG_7:%.*]] = sub nsw i32 0, [[TMP100]]
-; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <16 x i1> [[TMP55]], i32 12
-; CHECK-NEXT:    [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[TMP100]]
+; CHECK-NEXT:    [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
+; CHECK-NEXT:    [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP66]], [[SPEC_SELECT8_2_3]]
+; CHECK-NEXT:    [[TMP67:%.*]] = or i1 [[CMP12_3_3]], [[TMP64]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP67]], i32 3, i32 [[SPEC_SELECT_3_2]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP66]], i32 [[SPEC_SELECT8_2_3]]
+; CHECK-NEXT:    [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]]
+; CHECK-NEXT:    [[TMP68:%.*]] = icmp slt i32 [[SUB_4]], 0
+; CHECK-NEXT:    [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]]
+; CHECK-NEXT:    [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[NEG_4]], i32 [[SUB_4]]
+; CHECK-NEXT:    [[CMP12_4:%.*]] = icmp slt i32 [[TMP69]], [[SPEC_SELECT8_3_3]]
+; CHECK-NEXT:    [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP69]], i32 [[SPEC_SELECT8_3_3]]
+; CHECK-NEXT:    [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]]
+; CHECK-NEXT:    [[TMP70:%.*]] = icmp slt i32 [[SUB_1_4]], 0
+; CHECK-NEXT:    [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]]
+; CHECK-NEXT:    [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
+; CHECK-NEXT:    [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP71]], [[SPEC_SELECT8_4]]
+; CHECK-NEXT:    [[TMP72:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP71]], i32 [[SPEC_SELECT8_4]]
+; CHECK-NEXT:    [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]]
+; CHECK-NEXT:    [[TMP73:%.*]] = icmp slt i32 [[SUB_2_4]], 0
+; CHECK-NEXT:    [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]]
+; CHECK-NEXT:    [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
+; CHECK-NEXT:    [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP74]], [[SPEC_SELECT8_1_4]]
+; CHECK-NEXT:    [[TMP75:%.*]] = or i1 [[CMP12_2_4]], [[TMP72]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP74]], i32 [[SPEC_SELECT8_1_4]]
+; CHECK-NEXT:    [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]]
+; CHECK-NEXT:    [[TMP76:%.*]] = icmp slt i32 [[SUB_3_4]], 0
+; CHECK-NEXT:    [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]]
+; CHECK-NEXT:    [[TMP77:%.*]] = select i1 [[TMP76]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
+; CHECK-NEXT:    [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP77]], [[SPEC_SELECT8_2_4]]
+; CHECK-NEXT:    [[TMP78:%.*]] = or i1 [[CMP12_3_4]], [[TMP75]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP78]], i32 4, i32 [[SPEC_SELECT_3_3]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP77]], i32 [[SPEC_SELECT8_2_4]]
+; CHECK-NEXT:    [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]]
+; CHECK-NEXT:    [[TMP79:%.*]] = icmp slt i32 [[SUB_5]], 0
+; CHECK-NEXT:    [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]]
+; CHECK-NEXT:    [[TMP80:%.*]] = select i1 [[TMP79]], i32 [[NEG_5]], i32 [[SUB_5]]
+; CHECK-NEXT:    [[CMP12_5:%.*]] = icmp slt i32 [[TMP80]], [[SPEC_SELECT8_3_4]]
+; CHECK-NEXT:    [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP80]], i32 [[SPEC_SELECT8_3_4]]
+; CHECK-NEXT:    [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]]
+; CHECK-NEXT:    [[TMP81:%.*]] = icmp slt i32 [[SUB_1_5]], 0
+; CHECK-NEXT:    [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]]
+; CHECK-NEXT:    [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
+; CHECK-NEXT:    [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_5]]
+; CHECK-NEXT:    [[TMP83:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP82]], i32 [[SPEC_SELECT8_5]]
+; CHECK-NEXT:    [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]]
+; CHECK-NEXT:    [[TMP84:%.*]] = icmp slt i32 [[SUB_2_5]], 0
+; CHECK-NEXT:    [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]]
+; CHECK-NEXT:    [[TMP85:%.*]] = select i1 [[TMP84]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
+; CHECK-NEXT:    [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP85]], [[SPEC_SELECT8_1_5]]
+; CHECK-NEXT:    [[TMP86:%.*]] = or i1 [[CMP12_2_5]], [[TMP83]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP85]], i32 [[SPEC_SELECT8_1_5]]
+; CHECK-NEXT:    [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]]
+; CHECK-NEXT:    [[TMP87:%.*]] = icmp slt i32 [[SUB_3_5]], 0
+; CHECK-NEXT:    [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]]
+; CHECK-NEXT:    [[TMP88:%.*]] = select i1 [[TMP87]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
+; CHECK-NEXT:    [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP88]], [[SPEC_SELECT8_2_5]]
+; CHECK-NEXT:    [[TMP89:%.*]] = or i1 [[CMP12_3_5]], [[TMP86]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP89]], i32 5, i32 [[SPEC_SELECT_3_4]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP88]], i32 [[SPEC_SELECT8_2_5]]
+; CHECK-NEXT:    [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]]
+; CHECK-NEXT:    [[TMP90:%.*]] = icmp slt i32 [[SUB_6]], 0
+; CHECK-NEXT:    [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]]
+; CHECK-NEXT:    [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[NEG_6]], i32 [[SUB_6]]
+; CHECK-NEXT:    [[CMP12_6:%.*]] = icmp slt i32 [[TMP91]], [[SPEC_SELECT8_3_5]]
+; CHECK-NEXT:    [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP91]], i32 [[SPEC_SELECT8_3_5]]
+; CHECK-NEXT:    [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]]
+; CHECK-NEXT:    [[TMP92:%.*]] = icmp slt i32 [[SUB_1_6]], 0
+; CHECK-NEXT:    [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]]
+; CHECK-NEXT:    [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
+; CHECK-NEXT:    [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_6]]
+; CHECK-NEXT:    [[TMP94:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP93]], i32 [[SPEC_SELECT8_6]]
+; CHECK-NEXT:    [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]]
+; CHECK-NEXT:    [[TMP95:%.*]] = icmp slt i32 [[SUB_2_6]], 0
+; CHECK-NEXT:    [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]]
+; CHECK-NEXT:    [[TMP96:%.*]] = select i1 [[TMP95]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
+; CHECK-NEXT:    [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP96]], [[SPEC_SELECT8_1_6]]
+; CHECK-NEXT:    [[TMP97:%.*]] = or i1 [[CMP12_2_6]], [[TMP94]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP96]], i32 [[SPEC_SELECT8_1_6]]
+; CHECK-NEXT:    [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]]
+; CHECK-NEXT:    [[TMP98:%.*]] = icmp slt i32 [[SUB_3_6]], 0
+; CHECK-NEXT:    [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]]
+; CHECK-NEXT:    [[TMP99:%.*]] = select i1 [[TMP98]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
+; CHECK-NEXT:    [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP99]], [[SPEC_SELECT8_2_6]]
+; CHECK-NEXT:    [[TMP100:%.*]] = or i1 [[CMP12_3_6]], [[TMP97]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP100]], i32 6, i32 [[SPEC_SELECT_3_5]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP99]], i32 [[SPEC_SELECT8_2_6]]
+; CHECK-NEXT:    [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]]
+; CHECK-NEXT:    [[TMP101:%.*]] = icmp slt i32 [[SUB_7]], 0
+; CHECK-NEXT:    [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]]
+; CHECK-NEXT:    [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[SUB_7]]
 ; CHECK-NEXT:    [[CMP12_7:%.*]] = icmp slt i32 [[TMP102]], [[SPEC_SELECT8_3_6]]
 ; CHECK-NEXT:    [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP102]], i32 [[SPEC_SELECT8_3_6]]
-; CHECK-NEXT:    [[TMP103:%.*]] = extractelement <16 x i32> [[TMP53]], i32 13
-; CHECK-NEXT:    [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP103]]
-; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <16 x i1> [[TMP55]], i32 13
-; CHECK-NEXT:    [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_1_7]], i32 [[TMP103]]
-; CHECK-NEXT:    [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_7]]
-; CHECK-NEXT:    [[TMP106:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP105]], i32 [[SPEC_SELECT8_7]]
-; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <16 x i32> [[TMP53]], i32 14
-; CHECK-NEXT:    [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP107]]
-; CHECK-NEXT:    [[TMP108:%.*]] = extractelement <16 x i1> [[TMP55]], i32 14
-; CHECK-NEXT:    [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[TMP107]]
-; CHECK-NEXT:    [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]]
-; CHECK-NEXT:    [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP106]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]]
-; CHECK-NEXT:    [[TMP111:%.*]] = extractelement <16 x i32> [[TMP53]], i32 15
-; CHECK-NEXT:    [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP111]]
-; CHECK-NEXT:    [[TMP112:%.*]] = extractelement <16 x i1> [[TMP55]], i32 15
-; CHECK-NEXT:    [[TMP113:%.*]] = select i1 [[TMP112]], i32 [[NEG_3_7]], i32 [[TMP111]]
-; CHECK-NEXT:    [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP113]], [[SPEC_SELECT8_2_7]]
-; CHECK-NEXT:    [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]]
+; CHECK-NEXT:    [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]]
+; CHECK-NEXT:    [[TMP103:%.*]] = icmp slt i32 [[SUB_1_7]], 0
+; CHECK-NEXT:    [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]]
+; CHECK-NEXT:    [[TMP104:%.*]] = select i1 [[TMP103]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
+; CHECK-NEXT:    [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP104]], [[SPEC_SELECT8_7]]
+; CHECK-NEXT:    [[TMP105:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP104]], i32 [[SPEC_SELECT8_7]]
+; CHECK-NEXT:    [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]]
+; CHECK-NEXT:    [[TMP106:%.*]] = icmp slt i32 [[SUB_2_7]], 0
+; CHECK-NEXT:    [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]]
+; CHECK-NEXT:    [[TMP107:%.*]] = select i1 [[TMP106]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
+; CHECK-NEXT:    [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP107]], [[SPEC_SELECT8_1_7]]
+; CHECK-NEXT:    [[TMP108:%.*]] = or i1 [[CMP12_2_7]], [[TMP105]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP107]], i32 [[SPEC_SELECT8_1_7]]
+; CHECK-NEXT:    [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]]
+; CHECK-NEXT:    [[TMP109:%.*]] = icmp slt i32 [[SUB_3_7]], 0
+; CHECK-NEXT:    [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]]
+; CHECK-NEXT:    [[TMP110:%.*]] = select i1 [[TMP109]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
+; CHECK-NEXT:    [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP110]], [[SPEC_SELECT8_2_7]]
+; CHECK-NEXT:    [[TMP111:%.*]] = or i1 [[CMP12_3_7]], [[TMP108]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP111]], i32 7, i32 [[SPEC_SELECT_3_6]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP110]], i32 [[SPEC_SELECT8_2_7]]
 ; CHECK-NEXT:    [[K:%.*]] = getelementptr inbounds [366 x i32], ptr @l, i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i32 [[SPEC_SELECT_3_7]], ptr [[K]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1


        


More information about the llvm-commits mailing list