[llvm] [AArch64] Add all cost kinds for getArithmeeticInstrCost (PR #154381)

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 19 09:49:22 PDT 2025


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/154381

This:
  - Adds all (CodeSize, Latency, LatSize) to getArithmeeticInstrCost.
  - Uses an Adjusted TypeLegalizationCost that accounts for the size of a vector, not the number of times it will be split.
  - Adds a base cost of 10 (Call Overhead) for all fp128 operations.
  - Cleans up the order of instructions.
  - Adds some slightly better i128 costs for multiply etc.

>From 56fff55890a6a432ea0eee589a818f489ebd2b38 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 19 Aug 2025 17:34:18 +0100
Subject: [PATCH] [AArch64] Add all cost kinds for getArithmeeticInstrCost

This:
  - Adds all (CodeSize, Latency, LatSize) to getArithmeeticInstrCost.
  - Uses an Adjusted TypeLegalizationCost that accounts for the size of a
    vector, not the number of times it will be split.
  - Adds a base cost of 10 (Call Overhead) for all fp128 operations.
  - Cleans up the order of instructions.
  - Adds some slightly better i128 costs for multiply etc.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 144 +++--
 .../AArch64/AArch64TargetTransformInfo.h      |   3 +
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |   9 +-
 .../Analysis/CostModel/AArch64/arith-fp.ll    | 228 +++----
 .../CostModel/AArch64/arith-overflow.ll       | 112 ++--
 .../CostModel/AArch64/arith-widening.ll       | 356 +++++-----
 llvm/test/Analysis/CostModel/AArch64/arith.ll | 160 +++--
 llvm/test/Analysis/CostModel/AArch64/cmp.ll   |   2 +-
 .../Analysis/CostModel/AArch64/cttz_elts.ll   |  32 +-
 llvm/test/Analysis/CostModel/AArch64/div.ll   | 608 +++++++++---------
 .../Analysis/CostModel/AArch64/div_cte.ll     |  12 +-
 llvm/test/Analysis/CostModel/AArch64/fshl.ll  |   8 +-
 llvm/test/Analysis/CostModel/AArch64/fshr.ll  |   8 +-
 llvm/test/Analysis/CostModel/AArch64/mul.ll   |  10 +-
 .../Analysis/CostModel/AArch64/reduce-fadd.ll |  72 +--
 llvm/test/Analysis/CostModel/AArch64/rem.ll   | 452 ++++++-------
 .../CostModel/AArch64/sve-arith-fp.ll         |  52 +-
 .../Analysis/CostModel/AArch64/sve-arith.ll   |   8 +-
 .../Analysis/CostModel/AArch64/sve-div.ll     | 428 ++++++------
 .../Analysis/CostModel/AArch64/sve-rem.ll     | 372 +++++------
 .../Analysis/CostModel/AArch64/vec3-ops.ll    |   4 +-
 .../LoopVectorize/AArch64/optsize_minsize.ll  |  54 +-
 22 files changed, 1606 insertions(+), 1528 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b021968bfd8ac..600c4c66a8ca4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4036,6 +4036,20 @@ std::optional<InstructionCost> AArch64TTIImpl::getFP16BF16PromoteCost(
   return Cost;
 }
 
+std::pair<InstructionCost, MVT>
+AArch64TTIImpl::getAdjustedTypeLegalizationCost(Type *Ty) const {
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
+  if (LT.first > 1 && Ty->isVectorTy() && LT.second.isVector() &&
+      Ty->isScalableTy() == LT.second.isScalableVector() &&
+      LT.second.getVectorMinNumElements() <
+          cast<VectorType>(Ty)->getElementCount().getKnownMinValue())
+    return {
+        divideCeil(cast<VectorType>(Ty)->getElementCount().getKnownMinValue(),
+                   LT.second.getVectorMinNumElements()),
+        LT.second};
+  return LT;
+}
+
 InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
     TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
@@ -4049,19 +4063,21 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     if (VTy->getElementCount() == ElementCount::getScalable(1))
       return InstructionCost::getInvalid();
 
-  // TODO: Handle more cost kinds.
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
-                                         Op2Info, Args, CxtI);
-
   // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
+  std::pair<InstructionCost, MVT> LT = getAdjustedTypeLegalizationCost(Ty);
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD == ISD::ADD || ISD == ISD::SUB || ISD == ISD::MUL ||
+         ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
+         ISD == ISD::UREM || ISD == ISD::AND || ISD == ISD::OR ||
+         ISD == ISD::XOR || ISD == ISD::SRL || ISD == ISD::SRA ||
+         ISD == ISD::SHL || ISD == ISD::FADD || ISD == ISD::FSUB ||
+         ISD == ISD::FMUL || ISD == ISD::FDIV || ISD == ISD::FREM ||
+         ISD == ISD::FNEG);
 
   // Increase the cost for half and bfloat types if not architecturally
   // supported.
   if (ISD == ISD::FADD || ISD == ISD::FSUB || ISD == ISD::FMUL ||
-      ISD == ISD::FDIV || ISD == ISD::FREM)
+      ISD == ISD::FDIV || ISD == ISD::FREM) {
     if (auto PromotedCost = getFP16BF16PromoteCost(
             Ty, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/true,
             [&](Type *PromotedTy) {
@@ -4070,10 +4086,46 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
             }))
       return *PromotedCost;
 
+    // fp128 all go via libcalls
+    if (Ty->getScalarType()->isFP128Ty())
+      return CostKind == TTI::TCK_CodeSize ? 1 : 10 * LT.first;
+  }
+
   switch (ISD) {
-  default:
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
-                                         Op2Info);
+  case ISD::ADD:
+  case ISD::SUB:
+    return LT.first; // Works OK for i128 too as it requires adds+adc.
+  case ISD::MUL:
+    // A v2i64 multiply is not legal for Neon.
+    // When SVE is available, then we can lower the v2i64 operation using
+    // the SVE mul instruction, which has a lower cost.
+
+    // There is no MUL.2d instruction, which means mul <2 x i64> is expensive as
+    // elements are extracted from the vectors and the muls scalarized. We
+    // estimate the cost for a i64 vector directly here, which is:
+    // - four 2-cost i64 extracts,
+    // - two 2-cost i64 inserts, and
+    // - two 1-cost muls.
+    // So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
+    // LT.first = 2 the cost is 28. If both operands are extensions it will not
+    // need to scalarize so the cost can be cheaper (smull or umull), and SVE
+    // has a native instruction we can use for v2i64.
+    if (LT.second == MVT::v2i64 && !ST->hasSVE() &&
+        !isWideningInstruction(Ty, Opcode, Args)) {
+      return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
+             (getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind) +
+              getVectorInstrCost(Instruction::ExtractElement, Ty, CostKind, -1,
+                                 nullptr, nullptr) *
+                  2 +
+              getVectorInstrCost(Instruction::InsertElement, Ty, CostKind, -1,
+                                 nullptr, nullptr));
+    }
+    // i128 multiply is umulh + 2*madd + mul.
+    if (Ty->getScalarSizeInBits() == 128)
+      return 2 * LT.first;
+    if (Ty->getScalarSizeInBits() <= 64)
+      return LT.first;
+    break;
   case ISD::SREM:
   case ISD::SDIV:
     /*
@@ -4288,44 +4340,28 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     }
     return Cost;
   }
-  case ISD::MUL:
-    // When SVE is available, then we can lower the v2i64 operation using
-    // the SVE mul instruction, which has a lower cost.
-    if (LT.second == MVT::v2i64 && ST->hasSVE())
-      return LT.first;
-
-    // When SVE is not available, there is no MUL.2d instruction,
-    // which means mul <2 x i64> is expensive as elements are extracted
-    // from the vectors and the muls scalarized.
-    // As getScalarizationOverhead is a bit too pessimistic, we
-    // estimate the cost for a i64 vector directly here, which is:
-    // - four 2-cost i64 extracts,
-    // - two 2-cost i64 inserts, and
-    // - two 1-cost muls.
-    // So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
-    // LT.first = 2 the cost is 28. If both operands are extensions it will not
-    // need to scalarize so the cost can be cheaper (smull or umull).
-    // so the cost can be cheaper (smull or umull).
-    if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
-      return LT.first;
-    return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
-           (getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind) +
-            getVectorInstrCost(Instruction::ExtractElement, Ty, CostKind, -1,
-                               nullptr, nullptr) *
-                2 +
-            getVectorInstrCost(Instruction::InsertElement, Ty, CostKind, -1,
-                               nullptr, nullptr));
-  case ISD::ADD:
-  case ISD::XOR:
-  case ISD::OR:
   case ISD::AND:
-  case ISD::SRL:
+  case ISD::OR:
+  case ISD::XOR:
+    return LT.first;
+
   case ISD::SRA:
+  case ISD::SRL:
   case ISD::SHL:
-    // These nodes are marked as 'custom' for combining purposes only.
-    // We know that they are legal. See LowerAdd in ISelLowering.
-    return LT.first;
+    return LT.first; // TODOD: i128. Vectors could be better?
 
+  case ISD::FADD:
+  case ISD::FSUB:
+    return (CostKind == TTI::TCK_Latency ? 3 : 1) * LT.first;
+  case ISD::FMUL:
+    // TODOD: Make this Cleaner?
+    return (CostKind == TTI::TCK_Latency
+                ? 3
+                : (CostKind == TTI::TCK_RecipThroughput ? 2 : 1)) *
+           LT.first;
+  case ISD::FDIV:
+    // TODOD: Better numbers?
+    return (CostKind == TTI::TCK_CodeSize ? 1 : 4) * LT.first;
   case ISD::FNEG:
     // Scalar fmul(fneg) or fneg(fmul) can be converted to fnmul
     if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
@@ -4335,29 +4371,17 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
           match(*CxtI->user_begin(), m_FMul(m_Value(), m_Value()))) ||
          match(CxtI->getOperand(0), m_FMul(m_Value(), m_Value()))))
       return 0;
-    [[fallthrough]];
-  case ISD::FADD:
-  case ISD::FSUB:
-    if (!Ty->getScalarType()->isFP128Ty())
-      return LT.first;
-    [[fallthrough]];
-  case ISD::FMUL:
-  case ISD::FDIV:
-    // These nodes are marked as 'custom' just to lower them to SVE.
-    // We know said lowering will incur no additional cost.
-    if (!Ty->getScalarType()->isFP128Ty())
-      return 2 * LT.first;
-
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
-                                         Op2Info);
+    return (CostKind == TTI::TCK_Latency ? 3 : 1) * LT.first;
   case ISD::FREM:
     // Pass nullptr as fmod/fmodf calls are emitted by the backend even when
     // those functions are not declared in the module.
-    if (!Ty->isVectorTy())
+    if (!Ty->isVectorTy() && CostKind != TTI::TCK_CodeSize)
       return getCallInstrCost(/*Function*/ nullptr, Ty, {Ty, Ty}, CostKind);
     return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
                                          Op2Info);
   }
+
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
 }
 
 InstructionCost
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 42ae962b3b426..2c70d6cb1f142 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -441,6 +441,9 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
 
   bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); }
 
+  std::pair<InstructionCost, MVT>
+  getAdjustedTypeLegalizationCost(Type *Ty) const;
+
   /// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the
   /// architecture features are not present.
   std::optional<InstructionCost>
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index f7d2258e1c283..d40ca374daad8 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -461,10 +461,11 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
                         return Res;
                       return Op;
                     });
-          UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
-          LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
-                            << Iteration << "): ");
-          LLVM_DEBUG(I->dump());
+          InstructionCost Cost = TTI.getInstructionCost(I, Operands, CostKind);
+          LLVM_DEBUG(dbgs()
+                     << "Adding cost " << Cost << " of instruction (iteration "
+                     << Iteration << "): " << *I << "\n");
+          UnrolledCost += Cost;
         }
 
         // We must count the cost of every operand which is not free,
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index c208d03ff94b7..781dbcaeda48a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -9,12 +9,12 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fadd float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fadd <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fadd double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fadd fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V2F129 = fadd <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fadd <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = fadd fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F129 = fadd <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fadd float undef, undef
@@ -34,17 +34,17 @@ define void @fadd() {
 
 define void @fadd_fp16() {
 ; CHECK-BASE-LABEL: 'fadd_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fadd half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %F16 = fadd half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %V4F16 = fadd <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:9 SizeLat:5 for: %V8F16 = fadd <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:7 Lat:15 SizeLat:7 for: %V16F16 = fadd <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fadd_fp16'
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fadd half undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fadd <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fadd half undef, undef
@@ -59,12 +59,12 @@ define void @fsub() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fsub float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fsub <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fsub double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fsub fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V2F129 = fsub <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fsub <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = fsub fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F129 = fsub <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fsub float undef, undef
@@ -84,17 +84,17 @@ define void @fsub() {
 
 define void @fsub_fp16() {
 ; CHECK-BASE-LABEL: 'fsub_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %F16 = fsub half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %V4F16 = fsub <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:9 SizeLat:5 for: %V8F16 = fsub <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:7 Lat:15 SizeLat:7 for: %V16F16 = fsub <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fsub_fp16'
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fsub <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fsub half undef, undef
@@ -109,12 +109,12 @@ define void @fneg_idiom() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fsub float -0.000000e+00, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fsub double -0.000000e+00, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fsub fp128 0xL00000000000000008000000000000000, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V2F129 = fsub <2 x fp128> splat (fp128 0xL00000000000000008000000000000000), undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = fsub fp128 0xL00000000000000008000000000000000, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F129 = fsub <2 x fp128> splat (fp128 0xL00000000000000008000000000000000), undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fsub float -0.0, undef
@@ -134,9 +134,9 @@ define void @fneg_idiom() {
 
 define void @fneg_idiom_fp16() {
 ; CHECK-BASE-LABEL: 'fneg_idiom_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half 0xH8000, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %F16 = fsub half 0xH8000, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:8 SizeLat:4 for: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fneg_idiom_fp16'
@@ -156,12 +156,12 @@ define void @fneg() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fneg float undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <4 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <8 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fneg <8 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fneg double undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <2 x double> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <4 x double> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fneg fp128 undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V2F129 = fneg <2 x fp128> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fneg <4 x double> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fneg fp128 undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V2F129 = fneg <2 x fp128> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fneg float undef
@@ -185,7 +185,7 @@ define void @fneg_fp16() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fneg <16 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fneg half undef
@@ -198,24 +198,24 @@ define void @fneg_fp16() {
 
 define void @fmulfneg() {
 ; CHECK-LABEL: 'fmulfneg'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fneg float undef
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %F32 = fneg float undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32M = fmul float %F32, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32M = fmul <2 x float> %V2F32, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <4 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32M = fmul <4 x float> %V4F32, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <8 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32M = fmul <8 x float> %V8F32, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fneg double undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fneg <8 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32M = fmul <8 x float> %V8F32, undef
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %F64 = fneg double undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64M = fmul double %F64, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <2 x double> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64M = fmul <2 x double> %V2F64, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <4 x double> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64M = fmul <4 x double> %V4F64, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fneg fp128 undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F128M = fmul fp128 %F128, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V2F128 = fneg <2 x fp128> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V2F128M = fmul <2 x fp128> %V2F128, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fneg <4 x double> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64M = fmul <4 x double> %V4F64, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fneg fp128 undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128M = fmul fp128 %F128, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V2F128 = fneg <2 x fp128> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F128M = fmul <2 x fp128> %V2F128, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fneg float undef
@@ -245,19 +245,19 @@ define void @fmulfneg() {
 define void @fmulneg_fp16() {
 ; CHECK-BASE-LABEL: 'fmulneg_fp16'
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half %F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %F16M = fmul half %F16, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> %V2F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %V2F16M = fmul <2 x half> %V2F16, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> %V4F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %V4F16M = fmul <4 x half> %V4F16, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> %V8F16, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> %V16F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:5 Lat:9 SizeLat:5 for: %V8F16M = fmul <8 x half> %V8F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fneg <16 x half> undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:7 Lat:15 SizeLat:7 for: %V16F16M = fmul <16 x half> %V16F16, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fmulneg_fp16'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 0 for: %F16 = fneg half undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half %F16, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> %V2F16, undef
@@ -265,8 +265,8 @@ define void @fmulneg_fp16() {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> %V4F16, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> %V8F16, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> %V16F16, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fneg <16 x half> undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16M = fmul <16 x half> %V16F16, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fneg half undef
@@ -285,23 +285,23 @@ define void @fmulneg_fp16() {
 define void @fnegfmul() {
 ; CHECK-LABEL: 'fnegfmul'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32M = fmul float undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fneg float %F32M
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %F32 = fneg float %F32M
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32M = fmul <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <2 x float> %V2F32M
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32M = fmul <4 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <4 x float> %V4F32M
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32M = fmul <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <8 x float> %V8F32M
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32M = fmul <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fneg <8 x float> %V8F32M
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64M = fmul double undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fneg double %F64M
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %F64 = fneg double %F64M
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64M = fmul <2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <2 x double> %V2F64M
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64M = fmul <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <4 x double> %V4F64M
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F128M = fmul fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fneg fp128 %F128M
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V2F128M = fmul <2 x fp128> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V2F128 = fneg <2 x fp128> %V2F128M
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64M = fmul <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fneg <4 x double> %V4F64M
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128M = fmul fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fneg fp128 %F128M
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F128M = fmul <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V2F128 = fneg <2 x fp128> %V2F128M
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32M = fmul float undef, undef
@@ -330,29 +330,29 @@ define void @fnegfmul() {
 
 define void @fnegfmul_fp16() {
 ; CHECK-BASE-LABEL: 'fnegfmul_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %F16M = fmul half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half %F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %V2F16M = fmul <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> %V2F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %V4F16M = fmul <4 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> %V4F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:5 Lat:9 SizeLat:5 for: %V8F16M = fmul <8 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> %V8F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> %V16F16M
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:7 Lat:15 SizeLat:7 for: %V16F16M = fmul <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fneg <16 x half> %V16F16M
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fnegfmul_fp16'
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half %F16M
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 0 for: %F16 = fneg half %F16M
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> %V2F16M
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> %V4F16M
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> %V8F16M
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> %V16F16M
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16M = fmul <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fneg <16 x half> %V16F16M
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16M = fmul half undef, undef
@@ -373,12 +373,12 @@ define void @fmul() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = fmul float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fmul <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fmul <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fmul <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fmul <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = fmul double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fmul <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fmul <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F128 = fmul fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V2F128 = fmul <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fmul <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = fmul fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F128 = fmul <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fmul float undef, undef
@@ -398,17 +398,17 @@ define void @fmul() {
 
 define void @fmul_fp16() {
 ; CHECK-BASE-LABEL: 'fmul_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %F16 = fmul half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %V4F16 = fmul <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:5 Lat:9 SizeLat:5 for: %V8F16 = fmul <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:7 Lat:15 SizeLat:7 for: %V16F16 = fmul <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fmul_fp16'
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fmul <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fmul half undef, undef
@@ -420,15 +420,15 @@ define void @fmul_fp16() {
 
 define void @fdiv() {
 ; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = fdiv float undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = fdiv <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = fdiv double undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %F128 = fdiv fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = fdiv <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %F32 = fdiv float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2F32 = fdiv <2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V4F32 = fdiv <4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %V8F32 = fdiv <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %F64 = fdiv double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2F64 = fdiv <2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %V4F64 = fdiv <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = fdiv fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F128 = fdiv <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = fdiv float undef, undef
@@ -448,17 +448,17 @@ define void @fdiv() {
 
 define void @fdiv_fp16() {
 ; CHECK-BASE-LABEL: 'fdiv_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = fdiv <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:7 SizeLat:7 for: %F16 = fdiv half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:7 SizeLat:7 for: %V4F16 = fdiv <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:5 Lat:11 SizeLat:11 for: %V8F16 = fdiv <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:7 Lat:19 SizeLat:19 for: %V16F16 = fdiv <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fdiv_fp16'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 4 for: %V16F16 = fdiv <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %V16F16 = fdiv <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fdiv half undef, undef
@@ -470,15 +470,15 @@ define void @fdiv_fp16() {
 
 define void @frem() {
 ; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:10 for: %F32 = frem float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = frem <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:104 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:10 for: %F64 = frem double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F128 = frem fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = frem fp128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:20 SizeLat:20 for: %V2F128 = frem <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = frem float undef, undef
@@ -498,14 +498,14 @@ define void @frem() {
 
 define void @frem_fp16() {
 ; CHECK-BASE-LABEL: 'frem_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:55 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:220 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:7 Lat:13 SizeLat:13 for: %F16 = frem half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:55 CodeSize:7 Lat:7 SizeLat:7 for: %V4F16 = frem <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:7 Lat:7 SizeLat:7 for: %V8F16 = frem <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:220 CodeSize:7 Lat:7 SizeLat:7 for: %V16F16 = frem <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'frem_fp16'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:10 SizeLat:10 for: %F16 = frem half undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:216 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef
@@ -666,10 +666,10 @@ define void @fma() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = call fp128 @llvm.fma.f128(fp128 undef, fp128 undef, fp128 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %V2F128 = call <2 x fp128> @llvm.fma.v2f128(<2 x fp128> undef, <2 x fp128> undef, <2 x fp128> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
@@ -701,7 +701,7 @@ define void @fma_fp16() {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
@@ -716,12 +716,12 @@ define void @fmuladd() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = call double @llvm.fmuladd.f64(double undef, double undef, double undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:6 SizeLat:2 for: %F128 = call fp128 @llvm.fmuladd.f128(fp128 undef, fp128 undef, fp128 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:2 Lat:6 SizeLat:2 for: %V2F128 = call <2 x fp128> @llvm.fmuladd.v2f128(<2 x fp128> undef, <2 x fp128> undef, <2 x fp128> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %F128 = call fp128 @llvm.fmuladd.f128(fp128 undef, fp128 undef, fp128 undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:2 Lat:40 SizeLat:40 for: %V2F128 = call <2 x fp128> @llvm.fmuladd.v2f128(<2 x fp128> undef, <2 x fp128> undef, <2 x fp128> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F32 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
@@ -742,16 +742,16 @@ define void @fmuladd() {
 define void @fmuladd_fp16() {
 ; CHECK-BASE-LABEL: 'fmuladd_fp16'
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %F16 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:2 Lat:6 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:2 Lat:6 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:8 Lat:12 SizeLat:8 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:10 Lat:18 SizeLat:10 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:14 Lat:30 SizeLat:14 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fmuladd_fp16'
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = call half @llvm.fmuladd.f32(half undef, half undef, half undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
index 3fbd12ac8813c..7337dbe3b492a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
@@ -27,20 +27,20 @@ define i32 @sadd(i32 %arg) {
 ; CHECK-LABEL: 'sadd'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 13 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 13 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 14 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
@@ -90,20 +90,20 @@ define i32 @uadd(i32 %arg) {
 ; CHECK-LABEL: 'uadd'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
@@ -153,20 +153,20 @@ define i32 @ssub(i32 %arg) {
 ; CHECK-LABEL: 'ssub'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 13 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 13 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 14 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
@@ -216,20 +216,20 @@ define i32 @usub(i32 %arg) {
 ; CHECK-LABEL: 'usub'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
@@ -278,21 +278,21 @@ declare {<64 x i8>, <64 x i1>}  @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @smul(i32 %arg) {
 ; CHECK-LABEL: 'smul'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:8 Lat:8 SizeLat:8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:9 Lat:9 SizeLat:9 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:136 CodeSize:11 Lat:11 SizeLat:11 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:18 Lat:18 SizeLat:18 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:32 Lat:32 SizeLat:32 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:60 Lat:60 SizeLat:60 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:8 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:9 Lat:9 SizeLat:9 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:11 Lat:11 SizeLat:11 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:24 Lat:36 SizeLat:36 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:44 Lat:68 SizeLat:68 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:84 Lat:132 SizeLat:132 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:16 Lat:16 SizeLat:16 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:28 Lat:28 SizeLat:28 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:16 Lat:16 SizeLat:16 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:28 Lat:28 SizeLat:28 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
@@ -341,21 +341,21 @@ declare {<64 x i8>, <64 x i1>}  @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @umul(i32 %arg) {
 ; CHECK-LABEL: 'umul'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:7 Lat:7 SizeLat:7 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:37 CodeSize:17 Lat:17 SizeLat:17 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:30 Lat:30 SizeLat:30 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:56 Lat:56 SizeLat:56 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:37 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:37 CodeSize:23 Lat:35 SizeLat:35 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:42 Lat:66 SizeLat:66 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:80 Lat:128 SizeLat:128 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:14 Lat:14 SizeLat:14 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:24 Lat:24 SizeLat:24 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:14 Lat:14 SizeLat:14 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:24 Lat:24 SizeLat:24 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
index 7e1588f427be4..15fea84fa8685 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
@@ -304,15 +304,15 @@ define void @extaddv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_32 = add <4 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = add <4 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_64 = add <4 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <4 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_64 = add <4 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = add <4 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_64 = add <4 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <4 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_64 = add <4 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %asw_16_32 = add <4 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
@@ -324,25 +324,25 @@ define void @extaddv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_16_32 = add <4 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = add <4 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_16_64 = add <4 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = add <4 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_16_64 = add <4 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = add <4 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_32_64 = add <4 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = add <4 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_32_64 = add <4 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = add <4 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_32_64 = add <4 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = add <4 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_32_64 = add <4 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
@@ -433,55 +433,55 @@ define void @extaddv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_16 = add <8 x i16> %zl1_8_16, %zl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = add <8 x i32> %i32, %sw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_32 = add <8 x i32> %i32, %sw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = add <8 x i32> %i32, %zw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_32 = add <8 x i32> %i32, %zw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = add <8 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_8_64 = add <8 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = add <8 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_8_64 = add <8 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = add <8 x i32> %i32, %sw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_16_32 = add <8 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = add <8 x i32> %sl1_16_32, %sl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_16_32 = add <8 x i32> %sl1_16_32, %sl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = add <8 x i32> %i32, %zw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_16_32 = add <8 x i32> %i32, %zw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = add <8 x i32> %zl1_16_32, %zl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_16_32 = add <8 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = add <8 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_16_64 = add <8 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = add <8 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_16_64 = add <8 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = add <8 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_32_64 = add <8 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = add <8 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_32_64 = add <8 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = add <8 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_32_64 = add <8 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = add <8 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_32_64 = add <8 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
@@ -562,65 +562,65 @@ define void @extaddv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 define void @extaddv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64)  {
 ; CHECK-LABEL: 'extaddv16'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_16 = add <16 x i16> %i16, %sw_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_16 = add <16 x i16> %i16, %sw_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_16 = add <16 x i16> %sl1_8_16, %sl2_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_16 = add <16 x i16> %sl1_8_16, %sl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_16 = add <16 x i16> %i16, %zw_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_16 = add <16 x i16> %i16, %zw_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_16 = add <16 x i16> %zl1_8_16, %zl2_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_16 = add <16 x i16> %zl1_8_16, %zl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = add <16 x i32> %i32, %sw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_8_32 = add <16 x i32> %i32, %sw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = add <16 x i32> %i32, %zw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_8_32 = add <16 x i32> %i32, %zw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = add <16 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asw_8_64 = add <16 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = add <16 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azw_8_64 = add <16 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = add <16 x i32> %i32, %sw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_16_32 = add <16 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = add <16 x i32> %sl1_16_32, %sl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_16_32 = add <16 x i32> %sl1_16_32, %sl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = add <16 x i32> %i32, %zw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_16_32 = add <16 x i32> %i32, %zw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = add <16 x i32> %zl1_16_32, %zl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_16_32 = add <16 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = add <16 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asw_16_64 = add <16 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = add <16 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azw_16_64 = add <16 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = add <16 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asw_32_64 = add <16 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = add <16 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_32_64 = add <16 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = add <16 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azw_32_64 = add <16 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = add <16 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_32_64 = add <16 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
@@ -999,15 +999,15 @@ define void @extsubv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_32 = sub <4 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = sub <4 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_64 = sub <4 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <4 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_64 = sub <4 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = sub <4 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_64 = sub <4 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <4 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_64 = sub <4 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %asw_16_32 = sub <4 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
@@ -1019,25 +1019,25 @@ define void @extsubv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_16_32 = sub <4 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = sub <4 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_16_64 = sub <4 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = sub <4 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_16_64 = sub <4 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = sub <4 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_32_64 = sub <4 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = sub <4 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_32_64 = sub <4 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = sub <4 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_32_64 = sub <4 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = sub <4 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_32_64 = sub <4 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
@@ -1128,55 +1128,55 @@ define void @extsubv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_16 = sub <8 x i16> %zl1_8_16, %zl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = sub <8 x i32> %i32, %sw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_32 = sub <8 x i32> %i32, %sw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = sub <8 x i32> %i32, %zw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_32 = sub <8 x i32> %i32, %zw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = sub <8 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_8_64 = sub <8 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = sub <8 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_8_64 = sub <8 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = sub <8 x i32> %i32, %sw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_16_32 = sub <8 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = sub <8 x i32> %sl1_16_32, %sl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_16_32 = sub <8 x i32> %sl1_16_32, %sl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = sub <8 x i32> %i32, %zw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_16_32 = sub <8 x i32> %i32, %zw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = sub <8 x i32> %zl1_16_32, %zl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_16_32 = sub <8 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = sub <8 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_16_64 = sub <8 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = sub <8 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_16_64 = sub <8 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = sub <8 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_32_64 = sub <8 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = sub <8 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_32_64 = sub <8 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = sub <8 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_32_64 = sub <8 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = sub <8 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_32_64 = sub <8 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
@@ -1257,65 +1257,65 @@ define void @extsubv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 define void @extsubv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64)  {
 ; CHECK-LABEL: 'extsubv16'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_16 = sub <16 x i16> %i16, %sw_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_16 = sub <16 x i16> %i16, %sw_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_16 = sub <16 x i16> %sl1_8_16, %sl2_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_16 = sub <16 x i16> %sl1_8_16, %sl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_16 = sub <16 x i16> %i16, %zw_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_16 = sub <16 x i16> %i16, %zw_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_16 = sub <16 x i16> %zl1_8_16, %zl2_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_16 = sub <16 x i16> %zl1_8_16, %zl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = sub <16 x i32> %i32, %sw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_8_32 = sub <16 x i32> %i32, %sw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = sub <16 x i32> %i32, %zw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_8_32 = sub <16 x i32> %i32, %zw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = sub <16 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asw_8_64 = sub <16 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = sub <16 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azw_8_64 = sub <16 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = sub <16 x i32> %i32, %sw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_16_32 = sub <16 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = sub <16 x i32> %sl1_16_32, %sl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_16_32 = sub <16 x i32> %sl1_16_32, %sl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = sub <16 x i32> %i32, %zw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_16_32 = sub <16 x i32> %i32, %zw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = sub <16 x i32> %zl1_16_32, %zl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_16_32 = sub <16 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = sub <16 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asw_16_64 = sub <16 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = sub <16 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azw_16_64 = sub <16 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = sub <16 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asw_32_64 = sub <16 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = sub <16 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_32_64 = sub <16 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = sub <16 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azw_32_64 = sub <16 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = sub <16 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_32_64 = sub <16 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
@@ -1555,15 +1555,15 @@ define void @extmulv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_32 = mul <2 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <2 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %asw_8_64 = mul <2 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <2 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %asl_8_64 = mul <2 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <2 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %azw_8_64 = mul <2 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <2 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %azl_8_64 = mul <2 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %asw_16_32 = mul <2 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
@@ -1575,22 +1575,22 @@ define void @extmulv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_16_32 = mul <2 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <2 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %asw_16_64 = mul <2 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <2 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %asl_16_64 = mul <2 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <2 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %azw_16_64 = mul <2 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <2 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %azl_16_64 = mul <2 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <2 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %asw_32_64 = mul <2 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %asl_32_64 = mul <2 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <2 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %azw_32_64 = mul <2 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_32_64 = mul <2 x i64> %zl1_32_64, %zl2_32_64
@@ -1694,15 +1694,15 @@ define void @extmulv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_32 = mul <4 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <4 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %asw_8_64 = mul <4 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <4 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %asl_8_64 = mul <4 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <4 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %azw_8_64 = mul <4 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %asw_16_32 = mul <4 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
@@ -1714,25 +1714,25 @@ define void @extmulv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_16_32 = mul <4 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <4 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %azw_16_64 = mul <4 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
@@ -1823,55 +1823,55 @@ define void @extmulv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %azl_8_16 = mul <8 x i16> %zl1_8_16, %zl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = mul <8 x i32> %i32, %zw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_32 = mul <8 x i32> %i32, %zw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <8 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %asw_8_64 = mul <8 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <8 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %azw_8_64 = mul <8 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <8 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %azw_16_64 = mul <8 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
@@ -1952,65 +1952,65 @@ define void @extmulv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64)  {
 ; CHECK-LABEL: 'extmulv16'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = mul <16 x i32> %i32, %zw_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_8_32 = mul <16 x i32> %i32, %zw_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <16 x i64> %i64, %sw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %asw_8_64 = mul <16 x i64> %i64, %sw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <16 x i64> %i64, %zw_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %azw_8_64 = mul <16 x i64> %i64, %zw_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <16 x i64> %i64, %zw_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %azw_16_64 = mul <16 x i64> %i64, %zw_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith.ll b/llvm/test/Analysis/CostModel/AArch64/arith.ll
index d3c1e07396921..425ce804ffb3c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith.ll
@@ -128,6 +128,31 @@ define void @i64() {
   ret void
 }
 
+define void @i128() {
+; CHECK-LABEL: 'i128'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c = add i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %d = sub i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %e = mul i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %f = ashr i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %g = lshr i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %h = shl i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %i = and i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %j = or i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %k = xor i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %c = add i128 undef, undef
+  %d = sub i128 undef, undef
+  %e = mul i128 undef, undef
+  %f = ashr i128 undef, undef
+  %g = lshr i128 undef, undef
+  %h = shl i128 undef, undef
+  %i = and i128 undef, undef
+  %j = or i128 undef, undef
+  %k = xor i128 undef, undef
+  ret void
+}
+
 
 define void @vi8() {
 ; CHECK-LABEL: 'vi8'
@@ -237,15 +262,15 @@ define void @vi16() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %i8 = and <8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %j8 = or <8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %k8 = xor <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c16 = add <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d16 = sub <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e16 = mul <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i16 = and <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j16 = or <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k16 = xor <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c16 = add <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %d16 = sub <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %e16 = mul <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %f16 = ashr <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %g16 = lshr <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %h16 = shl <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %i16 = and <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %j16 = or <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %k16 = xor <16 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c2 = add <2 x i16> undef, undef
@@ -307,24 +332,24 @@ define void @vi32() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %i4 = and <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %j4 = or <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %k4 = xor <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c8 = add <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d8 = sub <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e8 = mul <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f8 = ashr <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g8 = lshr <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h8 = shl <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i8 = and <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j8 = or <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k8 = xor <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %c16 = add <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %d16 = sub <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %e16 = mul <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %i16 = and <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %j16 = or <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %k16 = xor <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c8 = add <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %d8 = sub <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %e8 = mul <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %f8 = ashr <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %g8 = lshr <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %h8 = shl <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %i8 = and <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %j8 = or <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %k8 = xor <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %c16 = add <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %d16 = sub <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %e16 = mul <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %f16 = ashr <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %g16 = lshr <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %h16 = shl <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %i16 = and <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %j16 = or <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %k16 = xor <16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c2 = add <2 x i32> undef, undef
@@ -370,40 +395,40 @@ define void @vi64() {
 ; CHECK-LABEL: 'vi64'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c2 = add <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %d2 = sub <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %e2 = mul <2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %e2 = mul <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f2 = ashr <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %g2 = lshr <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %h2 = shl <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %i2 = and <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %j2 = or <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %k2 = xor <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c4 = add <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d4 = sub <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %e4 = mul <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f4 = ashr <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g4 = lshr <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h4 = shl <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i4 = and <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j4 = or <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k4 = xor <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %c8 = add <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %d8 = sub <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %e8 = mul <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %f8 = ashr <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %g8 = lshr <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %h8 = shl <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %i8 = and <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %j8 = or <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %k8 = xor <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %c16 = add <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %d16 = sub <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %e16 = mul <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %f16 = ashr <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %g16 = lshr <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %h16 = shl <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %i16 = and <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %j16 = or <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %k16 = xor <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c4 = add <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %d4 = sub <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %e4 = mul <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %f4 = ashr <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %g4 = lshr <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %h4 = shl <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %i4 = and <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %j4 = or <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %k4 = xor <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %c8 = add <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %d8 = sub <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:56 CodeSize:32 Lat:56 SizeLat:56 for: %e8 = mul <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %f8 = ashr <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %g8 = lshr <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %h8 = shl <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %i8 = and <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %j8 = or <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %k8 = xor <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %c16 = add <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %d16 = sub <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:112 CodeSize:64 Lat:112 SizeLat:112 for: %e16 = mul <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %f16 = ashr <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %g16 = lshr <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %h16 = shl <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %i16 = and <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %j16 = or <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %k16 = xor <16 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c2 = add <2 x i64> undef, undef
@@ -444,3 +469,28 @@ define void @vi64() {
   %k16 = xor <16 x i64> undef, undef
   ret void
 }
+
+define void @vi128() {
+; CHECK-LABEL: 'vi128'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %c2 = add <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %d2 = sub <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %e2 = mul <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %f2 = ashr <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %g2 = lshr <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %h2 = shl <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %i2 = and <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %j2 = or <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %k2 = xor <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %c2 = add <2 x i128> undef, undef
+  %d2 = sub <2 x i128> undef, undef
+  %e2 = mul <2 x i128> undef, undef
+  %f2 = ashr <2 x i128> undef, undef
+  %g2 = lshr <2 x i128> undef, undef
+  %h2 = shl <2 x i128> undef, undef
+  %i2 = and <2 x i128> undef, undef
+  %j2 = or <2 x i128> undef, undef
+  %k2 = xor <2 x i128> undef, undef
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
index f0e64b5d7b6a7..0770101481785 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
@@ -61,7 +61,7 @@ define void @andcmp() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64le = icmp slt i64 %a64, 1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64leneg = icmp sle i64 %a64, -1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64gtneg = icmp sgt i64 %a64, -1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a128 = and i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %a128 = and i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c128 = icmp eq i128 %a128, 0
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %av16i8 = and <16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv16i8 = icmp ne <16 x i8> %av16i8, zeroinitializer
diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll
index 15d09e00a4ee5..729ca7f1a9ecf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll
@@ -8,42 +8,42 @@ define void @foo_no_vscale_range() {
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:37 Lat:37 SizeLat:37 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:67 Lat:67 SizeLat:67 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:35 Lat:35 SizeLat:35 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i64.v32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:11 SizeLat:11 for: %res.i64.v32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i32.v32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:11 SizeLat:11 for: %res.i32.v32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:37 Lat:37 SizeLat:37 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:67 Lat:67 SizeLat:67 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:35 Lat:35 SizeLat:35 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.v16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i64.v32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:11 SizeLat:11 for: %res.i64.v32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.v16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:11 SizeLat:11 for: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true)
@@ -101,22 +101,22 @@ define void @foo_vscale_range_1_16() vscale_range(1,16) {
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:19 Lat:19 SizeLat:19 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:19 Lat:19 SizeLat:19 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:19 Lat:19 SizeLat:19 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:19 Lat:19 SizeLat:19 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
@@ -150,22 +150,22 @@ define void @foo_vscale_range_1_16384() vscale_range(1,16384) {
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:35 Lat:35 SizeLat:35 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:35 Lat:35 SizeLat:35 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:35 Lat:35 SizeLat:35 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:35 Lat:35 SizeLat:35 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll
index 3a2358dba51b2..59bb8b900f617 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@@ -5,29 +5,29 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @sdiv() {
 ; CHECK-LABEL: 'sdiv'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = sdiv i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i64 = sdiv <2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i64 = sdiv <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i64 = sdiv <8 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = sdiv i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i32 = sdiv <2 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i32 = sdiv <4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i32 = sdiv <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i32 = sdiv <16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = sdiv i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i16 = sdiv <2 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i16 = sdiv <4 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i16 = sdiv <8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i16 = sdiv <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i16 = sdiv <32 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = sdiv i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i8 = sdiv <2 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i8 = sdiv <4 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i8 = sdiv <8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i8 = sdiv <16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i8 = sdiv <32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:12 Lat:16 SizeLat:16 for: %V64i8 = sdiv <64 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, undef
@@ -63,29 +63,29 @@ define void @sdiv() {
 
 define void @udiv() {
 ; CHECK-LABEL: 'udiv'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = udiv i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = udiv i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i64 = udiv <2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i64 = udiv <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i64 = udiv <8 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i32 = udiv <2 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i32 = udiv <4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i32 = udiv <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i32 = udiv <16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i16 = udiv <2 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i16 = udiv <4 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i16 = udiv <8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i16 = udiv <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i16 = udiv <32 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i8 = udiv <2 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i8 = udiv <4 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i8 = udiv <8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i8 = udiv <16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i8 = udiv <32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:12 Lat:16 SizeLat:16 for: %V64i8 = udiv <64 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = udiv i128 undef, undef
@@ -122,41 +122,41 @@ define void @udiv() {
 define void @sdiv_uniform() {
 ; CHECK-LABEL: 'sdiv_uniform'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, %V2i64_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i64 = sdiv <2 x i64> undef, %V2i64_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, %V4i64_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i64 = sdiv <4 x i64> undef, %V4i64_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, %V8i64_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i64 = sdiv <8 x i64> undef, %V8i64_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32_s = shufflevector <2 x i32> poison, <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, %V2i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i32 = sdiv <2 x i32> undef, %V2i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32_s = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, %V4i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i32 = sdiv <4 x i32> undef, %V4i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, %V8i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i32 = sdiv <8 x i32> undef, %V8i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, %V16i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i32 = sdiv <16 x i32> undef, %V16i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16_s = shufflevector <2 x i16> poison, <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, %V2i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i16 = sdiv <2 x i16> undef, %V2i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16_s = shufflevector <4 x i16> poison, <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, %V4i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i16 = sdiv <4 x i16> undef, %V4i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16_s = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, %V8i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i16 = sdiv <8 x i16> undef, %V8i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, %V16i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i16 = sdiv <16 x i16> undef, %V16i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, %V32i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i16 = sdiv <32 x i16> undef, %V32i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8_s = shufflevector <2 x i8> poison, <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, %V2i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i8 = sdiv <2 x i8> undef, %V2i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8_s = shufflevector <4 x i8> poison, <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, %V4i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i8 = sdiv <4 x i8> undef, %V4i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8_s = shufflevector <8 x i8> poison, <8 x i8> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, %V8i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i8 = sdiv <8 x i8> undef, %V8i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8_s = shufflevector <16 x i8> poison, <16 x i8> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, %V16i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i8 = sdiv <16 x i8> undef, %V16i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, %V32i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i8 = sdiv <32 x i8> undef, %V32i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, %V64i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:12 Lat:16 SizeLat:16 for: %V64i8 = sdiv <64 x i8> undef, %V64i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -205,41 +205,41 @@ define void @sdiv_uniform() {
 define void @udiv_uniform() {
 ; CHECK-LABEL: 'udiv_uniform'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, %V2i64_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i64 = udiv <2 x i64> undef, %V2i64_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, %V4i64_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i64 = udiv <4 x i64> undef, %V4i64_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, %V8i64_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i64 = udiv <8 x i64> undef, %V8i64_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32_s = shufflevector <2 x i32> poison, <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, %V2i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i32 = udiv <2 x i32> undef, %V2i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32_s = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, %V4i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i32 = udiv <4 x i32> undef, %V4i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, %V8i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i32 = udiv <8 x i32> undef, %V8i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, %V16i32_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i32 = udiv <16 x i32> undef, %V16i32_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16_s = shufflevector <2 x i16> poison, <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, %V2i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i16 = udiv <2 x i16> undef, %V2i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16_s = shufflevector <4 x i16> poison, <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, %V4i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i16 = udiv <4 x i16> undef, %V4i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16_s = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, %V8i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i16 = udiv <8 x i16> undef, %V8i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, %V16i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i16 = udiv <16 x i16> undef, %V16i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, %V32i16_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i16 = udiv <32 x i16> undef, %V32i16_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8_s = shufflevector <2 x i8> poison, <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, %V2i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:12 Lat:16 SizeLat:16 for: %V2i8 = udiv <2 x i8> undef, %V2i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8_s = shufflevector <4 x i8> poison, <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, %V4i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:12 Lat:16 SizeLat:16 for: %V4i8 = udiv <4 x i8> undef, %V4i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8_s = shufflevector <8 x i8> poison, <8 x i8> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, %V8i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:12 Lat:16 SizeLat:16 for: %V8i8 = udiv <8 x i8> undef, %V8i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8_s = shufflevector <16 x i8> poison, <16 x i8> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, %V16i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:168 CodeSize:12 Lat:16 SizeLat:16 for: %V16i8 = udiv <16 x i8> undef, %V16i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, %V32i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:328 CodeSize:12 Lat:16 SizeLat:16 for: %V32i8 = udiv <32 x i8> undef, %V32i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, %V64i8_s
+; CHECK-NEXT:  Cost Model: Found costs of RThru:648 CodeSize:12 Lat:16 SizeLat:16 for: %V64i8 = udiv <64 x i8> undef, %V64i8_s
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -287,29 +287,29 @@ define void @udiv_uniform() {
 
 define void @sdiv_const() {
 ; CHECK-LABEL: 'sdiv_const'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i64 = sdiv <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i64 = sdiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i64 = sdiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i32 = sdiv <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i32 = sdiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i32 = sdiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i32 = sdiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i16 = sdiv <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i16 = sdiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i16 = sdiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i16 = sdiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:131 Lat:134 SizeLat:134 for: %V32i16 = sdiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:70 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i8 = sdiv <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i8 = sdiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i8 = sdiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i8 = sdiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:131 Lat:134 SizeLat:134 for: %V32i8 = sdiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:70 CodeSize:259 Lat:262 SizeLat:262 for: %V64i8 = sdiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, 7
@@ -345,29 +345,29 @@ define void @sdiv_const() {
 
 define void @udiv_const() {
 ; CHECK-LABEL: 'udiv_const'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %I128 = udiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:20 Lat:32 SizeLat:32 for: %V2i64 = udiv <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:40 Lat:64 SizeLat:64 for: %V4i64 = udiv <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:80 Lat:128 SizeLat:128 for: %V8i64 = udiv <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I32 = udiv i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = udiv <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = udiv <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = udiv <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = udiv <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I16 = udiv i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = udiv <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = udiv <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = udiv <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I8 = udiv i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = udiv <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = udiv <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = udiv <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = udiv <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = udiv <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 
@@ -404,29 +404,29 @@ define void @udiv_const() {
 
 define void @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, 7
@@ -462,29 +462,29 @@ define void @sdiv_uniformconst() {
 
 define void @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %I128 = udiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:20 Lat:32 SizeLat:32 for: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:40 Lat:64 SizeLat:64 for: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:80 Lat:128 SizeLat:128 for: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I32 = udiv i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I16 = udiv i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I8 = udiv i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = udiv i128 undef, 7
@@ -520,29 +520,29 @@ define void @udiv_uniformconst() {
 
 define void @sdiv_constpow2() {
 ; CHECK-LABEL: 'sdiv_constpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i64 = sdiv <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i64 = sdiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i64 = sdiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i32 = sdiv <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i32 = sdiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i32 = sdiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i32 = sdiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i16 = sdiv <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i16 = sdiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i16 = sdiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i16 = sdiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:131 Lat:134 SizeLat:134 for: %V32i16 = sdiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:70 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i8 = sdiv <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i8 = sdiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i8 = sdiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i8 = sdiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:131 Lat:134 SizeLat:134 for: %V32i8 = sdiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:70 CodeSize:259 Lat:262 SizeLat:262 for: %V64i8 = sdiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, 16
@@ -578,28 +578,28 @@ define void @sdiv_constpow2() {
 
 define void @udiv_constpow2() {
 ; CHECK-LABEL: 'udiv_constpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I128 = udiv i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = udiv i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64 = udiv <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4i64 = udiv <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i64 = udiv <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = udiv i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = udiv <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32 = udiv <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8i32 = udiv <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i32 = udiv <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I16 = udiv i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16 = udiv <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16 = udiv <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16 = udiv <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16i16 = udiv <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32i16 = udiv <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I8 = udiv i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8 = udiv <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8 = udiv <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8 = udiv <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8 = udiv <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32i8 = udiv <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64i8 = udiv <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
@@ -636,29 +636,29 @@ define void @udiv_constpow2() {
 
 define void @sdiv_uniformconstpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, 16
@@ -694,28 +694,28 @@ define void @sdiv_uniformconstpow2() {
 
 define void @udiv_uniformconstpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I128 = udiv i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = udiv i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = udiv i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I16 = udiv i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I8 = udiv i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
@@ -752,29 +752,29 @@ define void @udiv_uniformconstpow2() {
 
 define void @sdiv_constnegpow2() {
 ; CHECK-LABEL: 'sdiv_constnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i64 = sdiv <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i64 = sdiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i64 = sdiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i32 = sdiv <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i32 = sdiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i32 = sdiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i32 = sdiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i16 = sdiv <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i16 = sdiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i16 = sdiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i16 = sdiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:131 Lat:134 SizeLat:134 for: %V32i16 = sdiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:70 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:11 Lat:14 SizeLat:14 for: %V2i8 = sdiv <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:19 Lat:22 SizeLat:22 for: %V4i8 = sdiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:35 Lat:38 SizeLat:38 for: %V8i8 = sdiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:67 Lat:70 SizeLat:70 for: %V16i8 = sdiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:131 Lat:134 SizeLat:134 for: %V32i8 = sdiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:70 CodeSize:259 Lat:262 SizeLat:262 for: %V64i8 = sdiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, -16
@@ -810,29 +810,29 @@ define void @sdiv_constnegpow2() {
 
 define void @udiv_constnegpow2() {
 ; CHECK-LABEL: 'udiv_constnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %I128 = udiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:20 Lat:32 SizeLat:32 for: %V2i64 = udiv <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:40 Lat:64 SizeLat:64 for: %V4i64 = udiv <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:80 Lat:128 SizeLat:128 for: %V8i64 = udiv <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I32 = udiv i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = udiv <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = udiv <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = udiv <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = udiv <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I16 = udiv i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = udiv <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = udiv <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = udiv <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I8 = udiv i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = udiv <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = udiv <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = udiv <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = udiv <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = udiv <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = udiv i128 undef, -16
@@ -868,29 +868,29 @@ define void @udiv_constnegpow2() {
 
 define void @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = sdiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = sdiv i128 undef, -16
@@ -926,29 +926,29 @@ define void @sdiv_uniformconstnegpow2() {
 
 define void @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = udiv i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %I128 = udiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:20 Lat:32 SizeLat:32 for: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:40 Lat:64 SizeLat:64 for: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:80 Lat:128 SizeLat:128 for: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I32 = udiv i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I16 = udiv i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I8 = udiv i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = udiv i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
index 87e8fdb54ff15..0a1a33966fbaa 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'sdiv8xi16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %div = sdiv <16 x i8> %x, splat (i8 9)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %div = sdiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %div
 ;
   %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 
 define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'sdiv16xi8'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %div = sdiv <8 x i16> %x, splat (i16 9)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %div = sdiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %div
 ;
   %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 
 define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'sdiv32xi4'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %div = sdiv <4 x i32> %x, splat (i32 9)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %div = sdiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %div
 ;
   %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
@@ -34,7 +34,7 @@ define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 
 define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'udiv8xi16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %div = udiv <16 x i8> %x, splat (i8 9)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %div = udiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %div
 ;
   %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -43,7 +43,7 @@ define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 
 define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'udiv16xi8'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %div = udiv <8 x i16> %x, splat (i16 9)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %div = udiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %div
 ;
   %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -52,7 +52,7 @@ define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 
 define <4 x i32> @udiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'udiv32xi4'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %div = udiv <4 x i32> %x, splat (i32 9)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %div = udiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %div
 ;
   %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 9d06b4bdec9b4..615a75744a3d0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -224,7 +224,7 @@ declare <2 x i64> @llvm.fshl.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
 define <4 x i30> @fshl_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
 ; CHECK-LABEL: 'fshl_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; CHECK-NEXT:  Cost Model: Found costs of 14 for: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshl
 ;
 entry:
@@ -236,7 +236,7 @@ declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
 
 define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) {
 ; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:28 Lat:32 SizeLat:32 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshl
 ;
 entry:
@@ -259,7 +259,7 @@ declare i66 @llvm.fshl.i66(i66, i66, i66)
 
 define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) {
 ; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:28 Lat:32 SizeLat:32 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshl
 ;
 entry:
@@ -270,7 +270,7 @@ declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
 define i128 @fshl_i128(i128 %a, i128 %b) {
 ; CHECK-LABEL: 'fshl_i128'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshl
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index b31806b647868..aef36e1a2432e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -224,7 +224,7 @@ declare <2 x i64> @llvm.fshr.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
 define <4 x i30> @fshr_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
 ; CHECK-LABEL: 'fshr_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; CHECK-NEXT:  Cost Model: Found costs of 14 for: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshr
 ;
 entry:
@@ -236,7 +236,7 @@ declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
 
 define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) {
 ; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:28 Lat:32 SizeLat:32 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshr
 ;
 entry:
@@ -259,7 +259,7 @@ declare i66 @llvm.fshr.i66(i66, i66, i66)
 
 define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) {
 ; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:28 Lat:32 SizeLat:32 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshr
 ;
 entry:
@@ -270,7 +270,7 @@ declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
 define i128 @fshr_i128(i128 %a, i128 %b) {
 ; CHECK-LABEL: 'fshr_i128'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshr
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/mul.ll b/llvm/test/Analysis/CostModel/AArch64/mul.ll
index 7f65ddea582eb..ed16494f81fb0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/mul.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mul.ll
@@ -43,7 +43,7 @@ define <16 x i8> @t4(<16 x i8> %a, <16 x i8> %b)  {
 
 define <32 x i8> @t5(<32 x i8> %a, <32 x i8> %b)  {
 ; CHECK-LABEL: 't5'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %1 = mul <32 x i8> %a, %b
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %1 = mul <32 x i8> %a, %b
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %1
 ;
   %1 = mul <32 x i8> %a, %b
@@ -79,7 +79,7 @@ define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b)  {
 
 define <16 x i16> @t9(<16 x i16> %a, <16 x i16> %b)  {
 ; CHECK-LABEL: 't9'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %1 = mul <16 x i16> %a, %b
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %1 = mul <16 x i16> %a, %b
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %1
 ;
   %1 = mul <16 x i16> %a, %b
@@ -106,7 +106,7 @@ define <4 x i32> @t11(<4 x i32> %a, <4 x i32> %b)  {
 
 define <8 x i32> @t12(<8 x i32> %a, <8 x i32> %b)  {
 ; CHECK-LABEL: 't12'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %1 = mul <8 x i32> %a, %b
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %1 = mul <8 x i32> %a, %b
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %1
 ;
   %1 = mul <8 x i32> %a, %b
@@ -115,7 +115,7 @@ define <8 x i32> @t12(<8 x i32> %a, <8 x i32> %b)  {
 
 define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b)  {
 ; CHECK-LABEL: 't13'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %1 = mul nsw <2 x i64> %a, %b
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:14 SizeLat:14 for: %1 = mul nsw <2 x i64> %a, %b
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %1
 ;
   %1 = mul nsw <2 x i64> %a, %b
@@ -124,7 +124,7 @@ define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b)  {
 
 define <4 x i64> @t14(<4 x i64> %a, <4 x i64> %b)  {
 ; CHECK-LABEL: 't14'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %1 = mul nsw <4 x i64> %a, %b
+; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: %1 = mul nsw <4 x i64> %a, %b
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %1
 ;
   %1 = mul nsw <4 x i64> %a, %b
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
index c4236d2cf41b8..463f2c2731f88 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
@@ -12,7 +12,7 @@ define void @strict_fp_reductions() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:22 Lat:44 SizeLat:28 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:10 Lat:20 SizeLat:12 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:8 Lat:16 SizeLat:8 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
@@ -26,10 +26,10 @@ define void @strict_fp_reductions() {
 
 define void @strict_fp_reductions_fp16() {
 ; CHECK-NOFP16-LABEL: 'strict_fp_reductions_fp16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:16 SizeLat:12 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:23 Lat:34 SizeLat:26 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:47 Lat:70 SizeLat:54 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:94 Lat:140 SizeLat:108 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'strict_fp_reductions_fp16'
@@ -40,10 +40,10 @@ define void @strict_fp_reductions_fp16() {
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'strict_fp_reductions_fp16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:16 SizeLat:12 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:23 Lat:34 SizeLat:26 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:47 Lat:70 SizeLat:54 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:94 Lat:140 SizeLat:108 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
@@ -55,15 +55,15 @@ define void @strict_fp_reductions_fp16() {
 
 define void @strict_fp_reductions_bf16() {
 ; CHECK-NOFP16-LABEL: 'strict_fp_reductions_bf16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:34 SizeLat:26 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'strict_fp_reductions_bf16'
-; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:34 SizeLat:26 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'strict_fp_reductions_bf16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:23 Lat:34 SizeLat:26 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef)
@@ -86,7 +86,7 @@ define void @fast_fp_reductions() {
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:2 Lat:30 SizeLat:30 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
@@ -117,16 +117,16 @@ define void @fast_fp_reductions() {
 
 define void @fast_fp_reductions_fp16() {
 ; CHECK-NOFP16-LABEL: 'fast_fp_reductions_fp16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:7 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:7 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:39 Lat:51 SizeLat:39 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:39 Lat:51 SizeLat:39 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:60 Lat:76 SizeLat:60 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:60 Lat:76 SizeLat:60 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:47 Lat:59 SizeLat:47 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:47 Lat:59 SizeLat:47 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'fast_fp_reductions_fp16'
@@ -143,16 +143,16 @@ define void @fast_fp_reductions_fp16() {
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'fast_fp_reductions_fp16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:7 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:7 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:39 Lat:51 SizeLat:39 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:39 Lat:51 SizeLat:39 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:60 Lat:76 SizeLat:60 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:60 Lat:76 SizeLat:60 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:47 Lat:59 SizeLat:47 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:47 Lat:59 SizeLat:47 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
@@ -175,15 +175,15 @@ define void @fast_fp_reductions_fp16() {
 
 define void @fast_fp_reductions_bf16() {
 ; CHECK-NOFP16-LABEL: 'fast_fp_reductions_bf16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'fast_fp_reductions_bf16'
-; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'fast_fp_reductions_bf16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:14 Lat:18 SizeLat:14 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll
index 2fa62f1705911..02173ece24767 100644
--- a/llvm/test/Analysis/CostModel/AArch64/rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @srem() {
 ; CHECK-LABEL: 'srem'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = srem i64 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, undef
@@ -63,7 +63,7 @@ define void @srem() {
 
 define void @urem() {
 ; CHECK-LABEL: 'urem'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = urem i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, undef
@@ -287,29 +287,29 @@ define void @urem_uniform() {
 
 define void @srem_const() {
 ; CHECK-LABEL: 'srem_const'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i32 = srem <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i32 = srem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i16 = srem <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i16 = srem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i16 = srem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i16 = srem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:131 Lat:134 SizeLat:134 for: %V32i16 = srem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:198 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i8 = srem <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i8 = srem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i8 = srem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i8 = srem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:131 Lat:134 SizeLat:134 for: %V32i8 = srem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:198 CodeSize:259 Lat:262 SizeLat:262 for: %V64i8 = srem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = srem i128 undef, 7
@@ -345,29 +345,29 @@ define void @srem_const() {
 
 define void @urem_const() {
 ; CHECK-LABEL: 'urem_const'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, <i16 4, i16 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, <i8 4, i8 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %I128 = urem i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I64 = urem i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:29 Lat:47 SizeLat:47 for: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:58 Lat:94 SizeLat:94 for: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:116 Lat:188 SizeLat:188 for: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I32 = urem i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i32 = urem <2 x i32> undef, <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i32 = urem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i32 = urem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V16i32 = urem <16 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I16 = urem i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i16 = urem <2 x i16> undef, <i16 4, i16 5>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i16 = urem <4 x i16> undef, <i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i16 = urem <8 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = urem <16 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i16 = urem <32 x i16> undef, <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I8 = urem i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i8 = urem <2 x i8> undef, <i8 4, i8 5>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = urem <4 x i8> undef, <i8 4, i8 5, i8 6, i8 7>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8i8 = urem <8 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i8 = urem <16 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i8 = urem <32 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V64i8 = urem <64 x i8> undef, <i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 
@@ -404,29 +404,29 @@ define void @urem_const() {
 
 define void @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i32 = srem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i16 = srem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i16 = srem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i8 = srem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i8 = srem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i8 = srem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = srem i128 undef, 7
@@ -462,29 +462,29 @@ define void @srem_uniformconst() {
 
 define void @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %I128 = urem i128 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I64 = urem i64 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:29 Lat:47 SizeLat:47 for: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:58 Lat:94 SizeLat:94 for: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:116 Lat:188 SizeLat:188 for: %V8i64 = urem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I32 = urem i32 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i32 = urem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V16i32 = urem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I16 = urem i16 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i16 = urem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i16 = urem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i16 = urem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I8 = urem i8 undef, 7
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i8 = urem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = urem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8i8 = urem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = urem i128 undef, 7
@@ -520,29 +520,29 @@ define void @urem_uniformconst() {
 
 define void @srem_constpow2() {
 ; CHECK-LABEL: 'srem_constpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = srem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = srem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i32 = srem <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i32 = srem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i32 = srem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i32 = srem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = srem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i16 = srem <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i16 = srem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i16 = srem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i16 = srem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:131 Lat:134 SizeLat:134 for: %V32i16 = srem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = srem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:198 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i8 = srem <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i8 = srem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i8 = srem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i8 = srem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:131 Lat:134 SizeLat:134 for: %V32i8 = srem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:198 CodeSize:259 Lat:262 SizeLat:262 for: %V64i8 = srem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = srem i128 undef, 16
@@ -578,28 +578,28 @@ define void @srem_constpow2() {
 
 define void @urem_constpow2() {
 ; CHECK-LABEL: 'urem_constpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I128 = urem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = urem i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, <i32 2, i32 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = urem i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = urem <2 x i32> undef, <i32 2, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, <i16 2, i16 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I16 = urem i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16 = urem <2 x i16> undef, <i16 2, i16 4>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16 = urem <4 x i16> undef, <i16 2, i16 4, i16 8, i16 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, <i8 2, i8 4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I8 = urem i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8 = urem <2 x i8> undef, <i8 2, i8 4>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8 = urem <4 x i8> undef, <i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8 = urem <8 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
@@ -636,29 +636,29 @@ define void @urem_constpow2() {
 
 define void @srem_uniformconstpow2() {
 ; CHECK-LABEL: 'srem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = srem i64 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = srem i32 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = srem i16 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = srem i8 undef, 16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = srem i128 undef, 16
@@ -694,28 +694,28 @@ define void @srem_uniformconstpow2() {
 
 define void @urem_uniformconstpow2() {
 ; CHECK-LABEL: 'urem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I128 = urem i128 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = urem i64 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4i64 = urem <4 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i64 = urem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = urem i32 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = urem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8i32 = urem <8 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i32 = urem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I16 = urem i16 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16 = urem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16 = urem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16i16 = urem <16 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32i16 = urem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I8 = urem i8 undef, 16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8 = urem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8 = urem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8 = urem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32i8 = urem <32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
@@ -752,29 +752,29 @@ define void @urem_uniformconstpow2() {
 
 define void @srem_constnegpow2() {
 ; CHECK-LABEL: 'srem_constnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i32 = srem <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i32 = srem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i16 = srem <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i16 = srem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i16 = srem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i16 = srem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:131 Lat:134 SizeLat:134 for: %V32i16 = srem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:198 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:11 Lat:14 SizeLat:14 for: %V2i8 = srem <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %V4i8 = srem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:35 Lat:38 SizeLat:38 for: %V8i8 = srem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:67 Lat:70 SizeLat:70 for: %V16i8 = srem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:102 CodeSize:131 Lat:134 SizeLat:134 for: %V32i8 = srem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:198 CodeSize:259 Lat:262 SizeLat:262 for: %V64i8 = srem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = srem i128 undef, -16
@@ -810,29 +810,29 @@ define void @srem_constnegpow2() {
 
 define void @urem_constnegpow2() {
 ; CHECK-LABEL: 'urem_constnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, <i32 -2, i32 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, <i16 -2, i16 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, <i8 -2, i8 -4>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %I128 = urem i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I64 = urem i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:29 Lat:47 SizeLat:47 for: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:58 Lat:94 SizeLat:94 for: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:116 Lat:188 SizeLat:188 for: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I32 = urem i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i32 = urem <2 x i32> undef, <i32 -2, i32 -4>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i32 = urem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i32 = urem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V16i32 = urem <16 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256, i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I16 = urem i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i16 = urem <2 x i16> undef, <i16 -2, i16 -4>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i16 = urem <4 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i16 = urem <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = urem <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i16 = urem <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I8 = urem i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i8 = urem <2 x i8> undef, <i8 -2, i8 -4>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = urem <4 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8i8 = urem <8 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i8 = urem <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i8 = urem <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V64i8 = urem <64 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = urem i128 undef, -16
@@ -868,29 +868,29 @@ define void @urem_constnegpow2() {
 
 define void @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %I128 = srem i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = srem i64 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = srem i32 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = srem i16 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = srem i8 undef, -16
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = srem i128 undef, -16
@@ -926,29 +926,29 @@ define void @srem_uniformconstnegpow2() {
 
 define void @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = urem i128 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %I128 = urem i128 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I64 = urem i64 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:47 CodeSize:29 Lat:47 SizeLat:47 for: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:58 Lat:94 SizeLat:94 for: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:188 CodeSize:116 Lat:188 SizeLat:188 for: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I32 = urem i32 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I16 = urem i16 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %I8 = urem i8 undef, -16
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %I128 = urem i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
index 1c40354892191..c7f8b48c8707e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
@@ -7,13 +7,13 @@ define void @fadd() {
 ; CHECK-LABEL: 'fadd'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <vscale x 4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fadd <vscale x 16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %V1F32 = fadd <vscale x 1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <vscale x 2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fadd <vscale x 8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fadd <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V4F16 = fadd <vscale x 4 x half> undef, undef
@@ -35,13 +35,13 @@ define void @fsub() {
 ; CHECK-LABEL: 'fsub'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <vscale x 4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fsub <vscale x 16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %V1F32 = fsub <vscale x 1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <vscale x 2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fsub <vscale x 8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fsub <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V4F16 = fsub <vscale x 4 x half> undef, undef
@@ -64,12 +64,12 @@ define void @fneg() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <vscale x 2 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <vscale x 4 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <vscale x 8 x half> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <vscale x 16 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fneg <vscale x 16 x half> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <vscale x 2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <vscale x 4 x float> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <vscale x 8 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fneg <vscale x 8 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <vscale x 2 x double> undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <vscale x 4 x double> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fneg <vscale x 4 x double> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2F16 = fneg <vscale x 2 x half> undef
@@ -91,12 +91,12 @@ define void @fmul() {
 ; CHECK-LABEL: 'fmul'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <vscale x 4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = fmul <vscale x 16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fmul <vscale x 2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fmul <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fmul <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = fmul <vscale x 8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fmul <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fmul <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = fmul <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V4F16 = fmul <vscale x 4 x half> undef, undef
@@ -115,14 +115,14 @@ define void @fmul() {
 
 define void @fdiv() {
 ; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16F16 = fdiv <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = fdiv <vscale x 2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = fdiv <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8F32 = fdiv <vscale x 8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = fdiv <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4F64 = fdiv <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V4F16 = fdiv <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V8F16 = fdiv <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %V16F16 = fdiv <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2F32 = fdiv <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V4F32 = fdiv <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %V8F32 = fdiv <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2F64 = fdiv <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:8 SizeLat:8 for: %V4F64 = fdiv <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V4F16 = fdiv <vscale x 4 x half> undef, undef
@@ -169,12 +169,12 @@ define void @fma() {
 ; CHECK-LABEL: 'fma'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V4F16 = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
@@ -195,12 +195,12 @@ define void @fmuladd() {
 ; CHECK-LABEL: 'fmuladd'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll
index 75af1df08594f..0d72ad974907a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll
@@ -5,8 +5,8 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @scalable_sdiv() #0 {
 ; CHECK-LABEL: 'scalable_sdiv'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %sdiv_nxv16i8 = sdiv <vscale x 16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %sdiv_nxv8i16 = sdiv <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %sdiv_nxv16i8 = sdiv <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %sdiv_nxv8i16 = sdiv <vscale x 8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %sdiv_nxv4i32 = sdiv <vscale x 4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %sdiv_nxv2i64 = sdiv <vscale x 2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
@@ -22,8 +22,8 @@ entry:
 
 define void @scalable_udiv() #0 {
 ; CHECK-LABEL: 'scalable_udiv'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %udiv_nxv16i8 = udiv <vscale x 16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %udiv_nxv8i16 = udiv <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %udiv_nxv16i8 = udiv <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %udiv_nxv8i16 = udiv <vscale x 8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %udiv_nxv4i32 = udiv <vscale x 4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %udiv_nxv2i64 = udiv <vscale x 2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
index 84a9ca0fab6d8..3b1d1f3ec9a5b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll
@@ -8,23 +8,23 @@ define void @sdiv() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i64 = sdiv <4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = sdiv <2 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V6i32 = sdiv <6 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i32 = sdiv <8 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V6i8 = sdiv <6 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = sdiv <2 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = sdiv <4 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %V8i16 = sdiv <8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = sdiv <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %V32i16 = sdiv <32 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = sdiv <2 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i8 = sdiv <4 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %V6i8 = sdiv <6 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i8 = sdiv <8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %V16i8 = sdiv <16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i8 = sdiv <32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %V64i8 = sdiv <64 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i64 = sdiv <vscale x 4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, undef
@@ -34,15 +34,15 @@ define void @sdiv() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, undef
@@ -91,23 +91,23 @@ define void @udiv() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i64 = udiv <4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = udiv <2 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V6i32 = udiv <6 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i32 = udiv <8 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V6i8 = udiv <6 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %V8i16 = udiv <8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = udiv <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %V32i16 = udiv <32 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i8 = udiv <4 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %V6i8 = udiv <6 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i8 = udiv <8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %V16i8 = udiv <16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i8 = udiv <32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %V64i8 = udiv <64 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = udiv <vscale x 2 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i64 = udiv <vscale x 4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = udiv <vscale x 8 x i64> undef, undef
@@ -117,15 +117,15 @@ define void @udiv() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = udiv <vscale x 16 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = udiv <vscale x 2 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = udiv <vscale x 4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = udiv <vscale x 8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = udiv <vscale x 16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = udiv <vscale x 32 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %NV8i16 = udiv <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV16i16 = udiv <vscale x 16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:16 SizeLat:16 for: %NV32i16 = udiv <vscale x 32 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = udiv <vscale x 2 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = udiv <vscale x 4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = udiv <vscale x 8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = udiv <vscale x 16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = udiv <vscale x 32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = udiv <vscale x 64 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:16 Lat:16 SizeLat:16 for: %NV8i8 = udiv <vscale x 8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:32 Lat:32 SizeLat:32 for: %NV16i8 = udiv <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %NV32i8 = udiv <vscale x 32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:32 SizeLat:32 for: %NV64i8 = udiv <vscale x 64 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = udiv <2 x i64> undef, undef
@@ -171,42 +171,42 @@ define void @udiv() {
 
 define void @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
@@ -250,42 +250,42 @@ define void @sdiv_uniformconst() {
 
 define void @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = udiv <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = udiv <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = udiv <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = udiv <2 x i64> undef, splat (i64 7)
@@ -329,42 +329,42 @@ define void @udiv_uniformconst() {
 
 define void @sdiv_uniformconstpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
@@ -408,41 +408,41 @@ define void @sdiv_uniformconstpow2() {
 
 define void @udiv_uniformconstpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = udiv <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16 = udiv <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16 = udiv <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8 = udiv <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8 = udiv <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8 = udiv <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
@@ -488,41 +488,41 @@ define void @udiv_uniformconstpow2() {
 define void @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
@@ -566,42 +566,42 @@ define void @sdiv_uniformconstnegpow2() {
 
 define void @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i32 = udiv <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i16 = udiv <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i16 = udiv <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V2i8 = udiv <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V4i8 = udiv <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V8i8 = udiv <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i64 = udiv <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV4i64 = udiv <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV8i64 = udiv <vscale x 8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i32 = udiv <vscale x 2 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i32 = udiv <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV8i32 = udiv <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV16i32 = udiv <vscale x 16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i16 = udiv <vscale x 2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i16 = udiv <vscale x 4 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i16 = udiv <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV16i16 = udiv <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV32i16 = udiv <vscale x 32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV2i8 = udiv <vscale x 2 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i8 = udiv <vscale x 4 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i8 = udiv <vscale x 8 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i8 = udiv <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV32i8 = udiv <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %NV64i8 = udiv <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
index eac8b66bcd216..a96246804fb68 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
@@ -167,43 +167,43 @@ define void @urem() {
 
 define void @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = srem <vscale x 2 x i128> undef, splat (i128 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8i64 = srem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i32 = srem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V16i32 = srem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i16 = srem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i16 = srem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V32i16 = srem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2i8 = srem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V4i8 = srem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V8i8 = srem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V64i8 = srem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %NV2i128 = srem <vscale x 2 x i128> undef, splat (i128 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 7)
@@ -248,43 +248,43 @@ define void @srem_uniformconst() {
 
 define void @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = urem <vscale x 2 x i128> undef, splat (i128 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 7)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8i64 = urem <8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i32 = urem <2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V16i32 = urem <16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i16 = urem <2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i16 = urem <4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i16 = urem <32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i8 = urem <2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = urem <4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8i8 = urem <8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V64i8 = urem <64 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %NV2i128 = urem <vscale x 2 x i128> undef, splat (i128 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = urem <2 x i64> undef, splat (i64 7)
@@ -330,42 +330,42 @@ define void @urem_uniformconst() {
 define void @srem_uniformconstpow2() {
 ; CHECK-LABEL: 'srem_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = srem <vscale x 2 x i128> undef, splat (i128 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64i8 = srem <64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %NV2i128 = srem <vscale x 2 x i128> undef, splat (i128 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 16)
@@ -410,42 +410,42 @@ define void @srem_uniformconstpow2() {
 
 define void @urem_uniformconstpow2() {
 ; CHECK-LABEL: 'urem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4i64 = urem <4 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i64 = urem <8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i32 = urem <2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8i32 = urem <8 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i32 = urem <16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i16 = urem <2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i16 = urem <4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16i16 = urem <16 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32i16 = urem <32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2i8 = urem <2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4i8 = urem <4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8i8 = urem <8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32i8 = urem <32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64i8 = urem <64 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = urem <vscale x 2 x i128> undef, splat (i128 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %NV2i128 = urem <vscale x 2 x i128> undef, splat (i128 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
@@ -492,42 +492,42 @@ define void @urem_uniformconstpow2() {
 define void @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = srem <vscale x 2 x i128> undef, splat (i128 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %NV2i128 = srem <vscale x 2 x i128> undef, splat (i128 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = srem <2 x i64> undef, splat (i64 -16)
@@ -572,43 +572,43 @@ define void @srem_uniformconstnegpow2() {
 
 define void @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = urem <vscale x 2 x i128> undef, splat (i128 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 -16)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i32 = urem <2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i16 = urem <2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i16 = urem <4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V2i8 = urem <2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = urem <4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V8i8 = urem <8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of Invalid for: %NV2i128 = urem <vscale x 2 x i128> undef, splat (i128 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i64 = urem <vscale x 2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV4i64 = urem <vscale x 4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV8i64 = urem <vscale x 8 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i32 = urem <vscale x 2 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i32 = urem <vscale x 4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV8i32 = urem <vscale x 8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV16i32 = urem <vscale x 16 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i16 = urem <vscale x 2 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i16 = urem <vscale x 4 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV8i16 = urem <vscale x 8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV16i16 = urem <vscale x 16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV32i16 = urem <vscale x 32 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV2i8 = urem <vscale x 2 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV4i8 = urem <vscale x 4 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV8i8 = urem <vscale x 8 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %NV16i8 = urem <vscale x 16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %NV32i8 = urem <vscale x 32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %NV64i8 = urem <vscale x 64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %V2i64 = urem <2 x i64> undef, splat (i64 -16)
diff --git a/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll b/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll
index f234341b0fd86..48bd549e200ec 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll
@@ -206,8 +206,8 @@ define void @vec3_float(<3 x float> %a, <3 x float> %b, ptr %src, ptr %dst) {
 define void @vec3_half(<3 x half> %a, <3 x half> %b, ptr %src, ptr %dst) {
 ; CHECK-LABEL: 'vec3_half'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %l = load <3 x half>, ptr %src, align 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %add = fadd <3 x half> %l, %b
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %sub = fsub <3 x half> %add, %a
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %add = fadd <3 x half> %l, %b
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %sub = fsub <3 x half> %add, %a
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <3 x half> %sub, ptr %dst, align 1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 67e6902b5d32a..daa81b00aa59c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -624,44 +624,44 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; MINSIZE-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; MINSIZE:       [[VECTOR_PH]]:
 ; MINSIZE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; MINSIZE-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; MINSIZE-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
 ; MINSIZE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; MINSIZE-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
+; MINSIZE-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
 ; MINSIZE-NEXT:    [[TMP7:%.*]] = sub i64 15, [[TMP6]]
 ; MINSIZE-NEXT:    [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
 ; MINSIZE-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
-; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
-; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
-; MINSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
-; MINSIZE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
-; MINSIZE-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-; MINSIZE-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; MINSIZE-NEXT:    [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
-; MINSIZE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
+; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 15)
+; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[A]], i64 0
+; MINSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[B]], i64 0
+; MINSIZE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[C]], i64 0
+; MINSIZE-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+; MINSIZE-NEXT:    [[TMP17:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
+; MINSIZE-NEXT:    [[TMP18:%.*]] = mul <vscale x 2 x i8> [[TMP17]], splat (i8 1)
+; MINSIZE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i8> zeroinitializer, [[TMP18]]
 ; MINSIZE-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8
-; MINSIZE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; MINSIZE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[TMP12]], i64 0
+; MINSIZE-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
 ; MINSIZE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; MINSIZE:       [[VECTOR_BODY]]:
 ; MINSIZE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; MINSIZE-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
-; MINSIZE-NEXT:    [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
-; MINSIZE-NEXT:    [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; MINSIZE-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
-; MINSIZE-NEXT:    [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
-; MINSIZE-NEXT:    [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
+; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; MINSIZE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 2 x i8> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; MINSIZE-NEXT:    [[TMP10:%.*]] = mul <vscale x 2 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
+; MINSIZE-NEXT:    [[TMP11:%.*]] = lshr <vscale x 2 x i8> [[VEC_IND]], splat (i8 1)
+; MINSIZE-NEXT:    [[TMP19:%.*]] = mul <vscale x 2 x i8> [[TMP11]], [[BROADCAST_SPLAT2]]
+; MINSIZE-NEXT:    [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP19]], [[TMP10]]
+; MINSIZE-NEXT:    [[TMP14:%.*]] = lshr <vscale x 2 x i8> [[VEC_IND]], splat (i8 2)
+; MINSIZE-NEXT:    [[TMP15:%.*]] = mul <vscale x 2 x i8> [[TMP14]], [[BROADCAST_SPLAT4]]
+; MINSIZE-NEXT:    [[TMP16:%.*]] = add <vscale x 2 x i8> [[TMP13]], [[TMP15]]
 ; MINSIZE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; MINSIZE-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
+; MINSIZE-NEXT:    call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP16]], ptr [[TMP22]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; MINSIZE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
-; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]])
-; MINSIZE-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
+; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP9]])
+; MINSIZE-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
 ; MINSIZE-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; MINSIZE-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
+; MINSIZE-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i8> [[VEC_IND]], [[BROADCAST_SPLAT6]]
 ; MINSIZE-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; MINSIZE:       [[MIDDLE_BLOCK]]:
 ; MINSIZE-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]



More information about the llvm-commits mailing list