[llvm] [RISCV][CostModel] Updates reduction and shuffle cost (PR #77342)

Mon Jan 8 09:01:14 PST 2024

https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/77342

- Make VMV_S_* and VMV_*_S cost independent of LMUL
- Uses getRISCVInstructionCost() in reduction cost Add SplitCost for lmul larger than 8. e.g. The cost of vredsum on [vscale x 16 x i64] will be the cost of vadd on [vscale x 8 x i64] plus the cost of vredsum on [vscale x 8 x i64].

>From cb68022c2af9adcf4a3fa653f939a9525d3a2383 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Sun, 7 Jan 2024 23:55:34 -0800
Subject: [PATCH] [RISCV][CostModel] Updates reduction and shuffle cost

- Make VMV_S_* and VMV_*_S cost independent of LMUL
- Uses getRISCVInstructionCost() in reduction cost
  Add SplitCost for lmul larger than 8.
  e.g. The cost of vredsum on [vscale x 16 x i64] will be
       the cost of vadd on [vscale x 8 x i64] plus
       the cost of vredsum on [vscale x 8 x i64].
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 145 ++++++++---
 .../Analysis/CostModel/RISCV/reduce-add.ll    | 102 ++++----
 .../Analysis/CostModel/RISCV/reduce-and.ll    |  70 +++---
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 108 ++++-----
 .../Analysis/CostModel/RISCV/reduce-max.ll    | 140 +++++------
 .../Analysis/CostModel/RISCV/reduce-min.ll    | 140 +++++------
 .../Analysis/CostModel/RISCV/reduce-or.ll     | 114 ++++-----
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 146 +++++------
 .../CostModel/RISCV/reduce-scalable-int.ll    | 228 +++++++++---------
 .../Analysis/CostModel/RISCV/reduce-xor.ll    | 102 ++++----
 .../CostModel/RISCV/rvv-intrinsics.ll         |   8 +-
 .../Analysis/CostModel/RISCV/rvv-shuffle.ll   |  28 +--
 .../CostModel/RISCV/shuffle-broadcast.ll      |  12 +-
 .../RISCV/shuffle-insert_subvector.ll         |  76 +++---
 .../CostModel/RISCV/shuffle-reverse.ll        |  12 +-
 .../CostModel/RISCV/shuffle-select.ll         |   8 +-
 .../RISCV/riscv-vector-reverse.ll             |  20 +-
 17 files changed, 764 insertions(+), 695 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index b3916c98700519..6c143a762b0c03 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -46,6 +46,9 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
   InstructionCost Cost = 0;
   for (auto Op : OpCodes) {
     switch (Op) {
+    case RISCV::SLT:
+      Cost += 1;
+      break;
     case RISCV::VRGATHER_VI:
       Cost += TLI->getVRGatherVICost(VT);
       break;
@@ -84,8 +87,14 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
       Cost += VL;
       break;
     }
+    case RISCV::VMV_X_S:
+    case RISCV::VFMV_F_S:
+      Cost += 1;
+      break;
     case RISCV::VMV_S_X:
-      // FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
+    case RISCV::VFMV_S_F:
+      Cost += 1;
+      break;
     default:
       Cost += LMULCost;
     }
@@ -444,9 +453,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
     // vmv.s.x      v0, a0
     // vmerge.vvm   v8, v9, v8, v0
     return LT.first *
-           (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li
-            getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
-                                    LT.second, CostKind));
+           (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
+                                        LT.second, CostKind));
   }
   case TTI::SK_Broadcast: {
     bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -459,9 +467,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
         //   vmv.v.x v8, a0
         //   vmsne.vi v0, v8, 0
         return LT.first *
-               (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi
-                getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
-                                        LT.second, CostKind));
+               (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
+                                            LT.second, CostKind));
       }
       // Example sequence:
       //   vsetivli  zero, 2, e8, mf8, ta, mu (ignored)
@@ -473,12 +480,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
       //   vmsne.vi  v0, v8, 0
 
       return LT.first *
-             (TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi
-              TLI->getLMULCost(
-                  LT.second) + // FIXME: vmv.x.s is the same as extractelement
-              getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
-                                       RISCV::VMV_V_X, RISCV::VMSNE_VI},
-                                      LT.second, CostKind));
+             (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
+                                           RISCV::VMV_X_S, RISCV::VMV_V_X,
+                                           RISCV::VMSNE_VI},
+                                          LT.second, CostKind));
     }
 
     if (HasScalar) {
@@ -523,9 +528,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
     if (LT.second.isFixedLengthVector())
       // vrsub.vi has a 5 bit immediate field, otherwise an li suffices
       LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
-    // FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
-    InstructionCost GatherCost =
-        2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
+    InstructionCost GatherCost = getRISCVInstructionCost(
+        {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV}, LT.second,
+        CostKind);
     // Mask operation additionally required extend and truncate
     InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
     return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -1358,19 +1363,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
     return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
 
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
-  if (Ty->getElementType()->isIntegerTy(1))
-    // vcpop sequences, see vreduction-mask.ll.  umax, smin actually only
-    // cost 2, but we don't have enough info here so we slightly over cost.
-    return (LT.first - 1) + 3;
+  std::array<unsigned, 3> Opcodes;
+  if (Ty->getElementType()->isIntegerTy(1)) {
+    // vcpop sequences, see vreduction-mask.ll.
+    if ((IID == Intrinsic::umax) || (IID == Intrinsic::smin))
+      Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
+    else
+      Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
+    return (LT.first - 1) +
+           getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+  }
 
   // IR Reduction is composed by two vmv and one rvv reduction instruction.
-  InstructionCost BaseCost = 2;
-
-  if (CostKind == TTI::TCK_CodeSize)
-    return (LT.first - 1) + BaseCost;
-
-  unsigned VL = getEstimatedVLFor(Ty);
-  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+  unsigned SplitOp;
+  switch (IID) {
+  default:
+    llvm_unreachable("Unsupported intrinsic");
+  case Intrinsic::smax:
+    SplitOp = RISCV::VMAX_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
+    break;
+  case Intrinsic::smin:
+    SplitOp = RISCV::VMIN_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
+    break;
+  case Intrinsic::umax:
+    SplitOp = RISCV::VMAXU_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
+    break;
+  case Intrinsic::umin:
+    SplitOp = RISCV::VMINU_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
+    break;
+  case Intrinsic::maxnum:
+    SplitOp = RISCV::VFMAX_VV;
+    Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
+    break;
+  case Intrinsic::minnum:
+    SplitOp = RISCV::VFMIN_VV;
+    Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
+    break;
+  }
+  // Add a cost for data larger than LMUL8
+  InstructionCost SplitCost =
+      (LT.first > 1) ? (LT.first - 1) *
+                           getRISCVInstructionCost(SplitOp, LT.second, CostKind)
+                     : 0;
+  return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
 }
 
 InstructionCost
@@ -1392,20 +1431,50 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
     return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
 
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
-  if (Ty->getElementType()->isIntegerTy(1))
+  std::array<unsigned, 3> Opcodes;
+  if (Ty->getElementType()->isIntegerTy(1)) {
     // vcpop sequences, see vreduction-mask.ll
-    return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
+    if (ISD == ISD::AND)
+      Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
+    else
+      Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
+    return (LT.first - 1) +
+           getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+  }
 
   // IR Reduction is composed by two vmv and one rvv reduction instruction.
-  InstructionCost BaseCost = 2;
-
-  if (CostKind == TTI::TCK_CodeSize)
-    return (LT.first - 1) + BaseCost;
-
-  unsigned VL = getEstimatedVLFor(Ty);
-  if (TTI::requiresOrderedReduction(FMF))
-    return (LT.first - 1) + BaseCost + VL;
-  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+  unsigned SplitOp;
+  switch (ISD) {
+  case ISD::ADD:
+    SplitOp = RISCV::VADD_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::OR:
+    SplitOp = RISCV::VOR_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::XOR:
+    SplitOp = RISCV::VXOR_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::AND:
+    SplitOp = RISCV::VAND_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::FADD:
+    SplitOp = RISCV::VFADD_VV;
+    if (TTI::requiresOrderedReduction(FMF))
+      Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDOSUM_VS, RISCV::VFMV_F_S};
+    else
+      Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
+    break;
+  }
+  // Add a cost for data larger than LMUL8
+  InstructionCost SplitCost =
+      (LT.first > 1) ? (LT.first - 1) *
+                           getRISCVInstructionCost(SplitOp, LT.second, CostKind)
+                     : 0;
+  return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
 }
 
 InstructionCost RISCVTTIImpl::getExtendedReductionCost(
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6fe098628ea078..ed9d71cad0be61 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -6,25 +6,25 @@
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -51,14 +51,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -148,19 +148,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 3e2344bb41aa3e..4a4394e9f0f0da 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -60,14 +60,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -94,14 +94,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -124,18 +124,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -157,19 +157,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index e8ef6e92204311..2300ba3a3c0962 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -15,14 +15,14 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_half'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -45,18 +45,18 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -78,19 +78,19 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -117,14 +117,14 @@ define void @reduce_oredered_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_half'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -147,18 +147,18 @@ define void @reduce_oredered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_float'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -180,19 +180,19 @@ define void @reduce_oredered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_double'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c21b8520112cf8..11fa50d355e833 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -51,14 +51,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -148,19 +148,19 @@ define i32 @reduce_umax_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
@@ -221,14 +221,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -255,14 +255,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -285,18 +285,18 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -318,19 +318,19 @@ define i32 @reduce_smax_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 941c3cf8a999c8..416199a582f2a9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -51,14 +51,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -148,19 +148,19 @@ define i32 @reduce_umin_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
@@ -221,14 +221,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -255,14 +255,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -285,18 +285,18 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -318,19 +318,19 @@ define i32 @reduce_smin_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index fb23b46140d390..d1e48bc4d2a6d8 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -6,31 +6,31 @@
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -60,14 +60,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -94,14 +94,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -124,18 +124,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -157,19 +157,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 96361512579452..e42dc889f1ba02 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -12,7 +12,7 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -25,7 +25,7 @@ define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -40,7 +40,7 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -53,7 +53,7 @@ define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -68,7 +68,7 @@ define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -81,7 +81,7 @@ define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -96,7 +96,7 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -109,7 +109,7 @@ define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -124,7 +124,7 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -140,7 +140,7 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -156,7 +156,7 @@ define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -169,7 +169,7 @@ define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -184,7 +184,7 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -200,7 +200,7 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -216,7 +216,7 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -229,7 +229,7 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -244,7 +244,7 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -260,7 +260,7 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -276,7 +276,7 @@ define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -289,7 +289,7 @@ define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -304,7 +304,7 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -320,7 +320,7 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -336,7 +336,7 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -349,7 +349,7 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -364,7 +364,7 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -380,7 +380,7 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -396,7 +396,7 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -409,7 +409,7 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -424,7 +424,7 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -440,7 +440,7 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -456,7 +456,7 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -469,7 +469,7 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -482,7 +482,7 @@ define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -497,7 +497,7 @@ define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -512,7 +512,7 @@ define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -523,11 +523,11 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -542,7 +542,7 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -555,7 +555,7 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -568,7 +568,7 @@ define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -583,7 +583,7 @@ define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -598,7 +598,7 @@ define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -609,11 +609,11 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -628,7 +628,7 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -641,7 +641,7 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -654,7 +654,7 @@ define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -669,7 +669,7 @@ define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -684,7 +684,7 @@ define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -695,11 +695,11 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -714,7 +714,7 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -727,7 +727,7 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -740,7 +740,7 @@ define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -755,7 +755,7 @@ define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -770,7 +770,7 @@ define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -781,11 +781,11 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -800,7 +800,7 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -813,7 +813,7 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -826,7 +826,7 @@ define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -841,7 +841,7 @@ define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -856,7 +856,7 @@ define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -867,11 +867,11 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -886,7 +886,7 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -899,7 +899,7 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -912,7 +912,7 @@ define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -927,7 +927,7 @@ define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -942,7 +942,7 @@ define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -953,11 +953,11 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
@@ -970,7 +970,7 @@ define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index ca5b9774160e8c..2ffaf2dd22dcec 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -12,7 +12,7 @@ define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -27,7 +27,7 @@ define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@@ -42,7 +42,7 @@ define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@@ -57,7 +57,7 @@ define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@@ -72,7 +72,7 @@ define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@@ -87,7 +87,7 @@ define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -102,7 +102,7 @@ define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -117,7 +117,7 @@ define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -132,7 +132,7 @@ define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -147,7 +147,7 @@ define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@@ -162,7 +162,7 @@ define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@@ -177,7 +177,7 @@ define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@@ -192,7 +192,7 @@ define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@@ -207,7 +207,7 @@ define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -222,7 +222,7 @@ define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -237,7 +237,7 @@ define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -252,7 +252,7 @@ define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -267,7 +267,7 @@ define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@@ -282,7 +282,7 @@ define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@@ -297,7 +297,7 @@ define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@@ -312,7 +312,7 @@ define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@@ -327,7 +327,7 @@ define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -342,7 +342,7 @@ define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -357,7 +357,7 @@ define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -372,7 +372,7 @@ define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -387,7 +387,7 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -403,7 +403,7 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -419,7 +419,7 @@ define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@@ -434,7 +434,7 @@ define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@@ -449,7 +449,7 @@ define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@@ -464,7 +464,7 @@ define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@@ -479,7 +479,7 @@ define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -494,7 +494,7 @@ define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -509,7 +509,7 @@ define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -524,7 +524,7 @@ define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -539,7 +539,7 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -555,7 +555,7 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -571,7 +571,7 @@ define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@@ -586,7 +586,7 @@ define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@@ -601,7 +601,7 @@ define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@@ -616,7 +616,7 @@ define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@@ -631,7 +631,7 @@ define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -646,7 +646,7 @@ define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -661,7 +661,7 @@ define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -676,7 +676,7 @@ define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
@@ -691,7 +691,7 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -707,7 +707,7 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -723,7 +723,7 @@ define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
@@ -738,7 +738,7 @@ define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
@@ -753,7 +753,7 @@ define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
@@ -768,7 +768,7 @@ define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
@@ -783,7 +783,7 @@ define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
@@ -798,7 +798,7 @@ define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
@@ -813,7 +813,7 @@ define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
@@ -828,7 +828,7 @@ define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -843,7 +843,7 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -859,7 +859,7 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -875,7 +875,7 @@ define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@@ -890,7 +890,7 @@ define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@@ -905,7 +905,7 @@ define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@@ -920,7 +920,7 @@ define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@@ -935,7 +935,7 @@ define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -950,7 +950,7 @@ define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -965,7 +965,7 @@ define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -980,7 +980,7 @@ define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
@@ -995,7 +995,7 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1011,7 +1011,7 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1027,7 +1027,7 @@ define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1042,7 +1042,7 @@ define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1057,7 +1057,7 @@ define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1072,7 +1072,7 @@ define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1087,7 +1087,7 @@ define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
@@ -1102,7 +1102,7 @@ define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
@@ -1117,7 +1117,7 @@ define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
@@ -1132,7 +1132,7 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1147,7 +1147,7 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1163,7 +1163,7 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1179,7 +1179,7 @@ define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1194,7 +1194,7 @@ define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1209,7 +1209,7 @@ define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1224,7 +1224,7 @@ define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1239,7 +1239,7 @@ define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1254,7 +1254,7 @@ define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1269,7 +1269,7 @@ define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1284,7 +1284,7 @@ define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
@@ -1299,7 +1299,7 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1315,7 +1315,7 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1331,7 +1331,7 @@ define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1346,7 +1346,7 @@ define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1361,7 +1361,7 @@ define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1376,7 +1376,7 @@ define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1391,7 +1391,7 @@ define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
@@ -1406,7 +1406,7 @@ define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
@@ -1421,7 +1421,7 @@ define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
@@ -1436,7 +1436,7 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@@ -1451,7 +1451,7 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1467,7 +1467,7 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1483,7 +1483,7 @@ define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1498,7 +1498,7 @@ define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1513,7 +1513,7 @@ define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1528,7 +1528,7 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1543,7 +1543,7 @@ define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@@ -1558,7 +1558,7 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@@ -1573,7 +1573,7 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@@ -1588,7 +1588,7 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@@ -1603,7 +1603,7 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1619,7 +1619,7 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1635,7 +1635,7 @@ define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1650,7 +1650,7 @@ define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1665,7 +1665,7 @@ define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
@@ -1680,7 +1680,7 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
@@ -1695,7 +1695,7 @@ define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@@ -1710,7 +1710,7 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@@ -1725,7 +1725,7 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index b79cdb87a06a05..bf657eeea107f5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -6,25 +6,25 @@
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -51,14 +51,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -148,19 +148,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index 43a03404e8db65..b9eacda3046d9c 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -522,8 +522,8 @@ define void @reduce_add() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
@@ -593,8 +593,8 @@ define void @reduce_fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
index bd9f6af89a5cd9..601e0ffa446e11 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
@@ -14,7 +14,7 @@ define void  @vector_broadcast() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = shufflevector <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = shufflevector <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = shufflevector <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %8 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = shufflevector <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %10 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %11 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
@@ -29,7 +29,7 @@ define void  @vector_broadcast() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = shufflevector <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = shufflevector <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = shufflevector <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %8 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = shufflevector <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %10 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %11 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
@@ -78,20 +78,20 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x
 
 define void @vector_reverse() {
 ; CHECK-LABEL: 'vector_reverse'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %reverse_nxv8i64 = call <vscale x 8 x i64> @llvm.experimental.vector.reverse.nxv8i64(<vscale x 8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %reverse_nxv16i64 = call <vscale x 16 x i64> @llvm.experimental.vector.reverse.nxv16i64(<vscale x 16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %reverse_nxv32i64 = call <vscale x 32 x i64> @llvm.experimental.vector.reverse.nxv32i64(<vscale x 32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 83 for instruction: %reverse_nxv8i64 = call <vscale x 8 x i64> @llvm.experimental.vector.reverse.nxv8i64(<vscale x 8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 166 for instruction: %reverse_nxv16i64 = call <vscale x 16 x i64> @llvm.experimental.vector.reverse.nxv16i64(<vscale x 16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 332 for instruction: %reverse_nxv32i64 = call <vscale x 32 x i64> @llvm.experimental.vector.reverse.nxv32i64(<vscale x 32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
index 432b90d9305af3..46bf3152ac5bd3 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
@@ -45,9 +45,9 @@ define void  @broadcast_scalable() #0{
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %38 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %39 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %40 = shufflevector <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %41 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %42 = shufflevector <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %43 = shufflevector <vscale x 64 x i1> undef, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %41 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %42 = shufflevector <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %43 = shufflevector <vscale x 64 x i1> undef, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'broadcast_scalable'
@@ -92,9 +92,9 @@ define void  @broadcast_scalable() #0{
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %38 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %39 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %40 = shufflevector <vscale x 8 x i1> undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %41 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %42 = shufflevector <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %43 = shufflevector <vscale x 64 x i1> undef, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %41 = shufflevector <vscale x 16 x i1> undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %42 = shufflevector <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %43 = shufflevector <vscale x 64 x i1> undef, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %zero = shufflevector <vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i32> zeroinitializer
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll
index af656b4d7976d2..9a333dc8b8ddda 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll
@@ -9,13 +9,13 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -23,13 +23,13 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -53,13 +53,13 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -67,13 +67,13 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -102,13 +102,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -116,11 +116,11 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -133,13 +133,13 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -147,11 +147,11 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -199,13 +199,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -213,11 +213,11 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -230,13 +230,13 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -244,11 +244,11 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -306,7 +306,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -314,11 +314,11 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -341,7 +341,7 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -349,11 +349,11 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
index 17deeb2cfafa62..90e301905abd15 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
@@ -16,21 +16,21 @@ define void @reverse() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reverse'
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll
index 81454cc826e143..0fd1cfc908d97a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll
@@ -32,11 +32,11 @@ define <8 x i8> @select_non_contiguous_v8i8(<8 x i8> %v, <8 x i8> %w) {
 
 define <8 x i64> @select_start_v8i64(<8 x i64> %v, <8 x i64> %w) {
 ; CHECK-LABEL: 'select_start_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %res
 ;
 ; CHECK-SIZE-LABEL: 'select_start_v8i64'
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
 ;
   %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
@@ -45,11 +45,11 @@ define <8 x i64> @select_start_v8i64(<8 x i64> %v, <8 x i64> %w) {
 
 define <8 x i64> @select_non_contiguous_v8i64(<8 x i64> %v, <8 x i64> %w) {
 ; CHECK-LABEL: 'select_non_contiguous_v8i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %res
 ;
 ; CHECK-SIZE-LABEL: 'select_non_contiguous_v8i64'
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
 ;
   %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 1bcd7a2e009e0b..31c6eaeea0aec4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -36,10 +36,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -86,10 +86,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -112,7 +112,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
 ; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
-; CHECK-NEXT:  LV: Loop cost is 28
+; CHECK-NEXT:  LV: Loop cost is 32
 ; CHECK-NEXT:  LV: IC is 1
 ; CHECK-NEXT:  LV: VF is vscale x 4
 ; CHECK-NEXT:  LV: Not Interleaving.
@@ -176,10 +176,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -226,10 +226,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
-; CHECK-NEXT:  LV: Found an estimated cost of 11 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
@@ -252,7 +252,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
 ; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
-; CHECK-NEXT:  LV: Loop cost is 28
+; CHECK-NEXT:  LV: Loop cost is 32
 ; CHECK-NEXT:  LV: IC is 1
 ; CHECK-NEXT:  LV: VF is vscale x 4
 ; CHECK-NEXT:  LV: Not Interleaving.