[llvm] [RISCV][CostModel] Refine Arithmetic reduction costs (PR #79103)

Mon Jan 22 22:54:29 PST 2024

https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/79103

This patch is split off from #77342

- Correct for CodeSize cost that 1 instruction is not included. 3 is from {VMV.S, ReductionOp, VMV.X}
- Add SplitCost
 Unordered reduction chain a series of VADD/VFADD/... which scales with LMUL.
 Ordered reductions chain a series of VFREDOSUMs.
- Use MVT to estimate VL.

>From d17dd7e3e03fceeb1a1c19349fe35de0bd63393a Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 22 Jan 2024 21:43:45 -0800
Subject: [PATCH] [RISCV][CostModel] Refine Arithmetic reduction costs

- Correct cost for CodeSize
- Add SplitCost
  Unordered reduction chain a series of VADD/VFADD/...
  Ordered reductions chain a series of VFREDOSUMs.
- Use MVT to estimate VL
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  78 +++++++++--
 .../Analysis/CostModel/RISCV/reduce-add.ll    |  70 +++++-----
 .../Analysis/CostModel/RISCV/reduce-and.ll    |  70 +++++-----
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 108 +++++++-------
 .../Analysis/CostModel/RISCV/reduce-or.ll     |  70 +++++-----
 .../CostModel/RISCV/reduce-scalable-fp.ll     |  62 ++++----
 .../CostModel/RISCV/reduce-scalable-int.ll    | 132 +++++++++---------
 .../Analysis/CostModel/RISCV/reduce-xor.ll    |  70 +++++-----
 .../CostModel/RISCV/rvv-intrinsics.ll         |   8 +-
 9 files changed, 361 insertions(+), 307 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 866d5cf340e68b4..0799af0a7bad550 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -86,6 +86,10 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
     }
     case RISCV::VMV_X_S:
     case RISCV::VMV_S_X:
+    case RISCV::VFMV_F_S:
+    case RISCV::VFMV_S_F:
+    case RISCV::VMNAND_MM:
+    case RISCV::VCPOP_M:
       Cost += 1;
       break;
     default:
@@ -966,20 +970,70 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
     return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
 
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
-  if (Ty->getElementType()->isIntegerTy(1))
-    // vcpop sequences, see vreduction-mask.ll
-    return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
+  SmallVector<unsigned, 3> Opcodes;
+  Type *ElementTy = Ty->getElementType();
+  if (ElementTy->isIntegerTy(1)) {
+    if (ISD == ISD::AND) {
+      // Example sequences:
+      //   vsetvli a0, zero, e8, mf8, ta, ma
+      //   vmnot.m v8, v0
+      //   vcpop.m a0, v8
+      //   seqz a0, a0
+      Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M};
+      return (LT.first - 1) +
+             getRISCVInstructionCost(Opcodes, LT.second, CostKind) +
+             getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+                                CmpInst::ICMP_EQ, CostKind);
+    } else {
+      // Example sequences:
+      //   vsetvli a0, zero, e8, mf8, ta, ma
+      //   vcpop.m a0, v0
+      //   snez a0, a0
+      Opcodes = {RISCV::VCPOP_M};
+      return (LT.first - 1) +
+             getRISCVInstructionCost(Opcodes, LT.second, CostKind) +
+             getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
+                                CmpInst::ICMP_NE, CostKind);
+    }
+  }
 
   // IR Reduction is composed by two vmv and one rvv reduction instruction.
-  InstructionCost BaseCost = 2;
-
-  if (CostKind == TTI::TCK_CodeSize)
-    return (LT.first - 1) + BaseCost;
-
-  unsigned VL = getEstimatedVLFor(Ty);
-  if (TTI::requiresOrderedReduction(FMF))
-    return (LT.first - 1) + BaseCost + VL;
-  return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+  if (TTI::requiresOrderedReduction(FMF)) {
+    Opcodes.push_back(RISCV::VFMV_S_F);
+    for (unsigned i = 0; i < LT.first.getValue(); i++)
+      Opcodes.push_back(RISCV::VFREDOSUM_VS);
+    Opcodes.push_back(RISCV::VFMV_F_S);
+    return getRISCVInstructionCost(Opcodes, LT.second, CostKind);
+  }
+  unsigned SplitOp;
+  switch (ISD) {
+  case ISD::ADD:
+    SplitOp = RISCV::VADD_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::OR:
+    SplitOp = RISCV::VOR_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::XOR:
+    SplitOp = RISCV::VXOR_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::AND:
+    SplitOp = RISCV::VAND_VV;
+    Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
+    break;
+  case ISD::FADD:
+    SplitOp = RISCV::VFADD_VV;
+    Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
+    break;
+  }
+  // Add a cost for data larger than LMUL8
+  InstructionCost SplitCost =
+      (LT.first > 1) ? (LT.first - 1) *
+                           getRISCVInstructionCost(SplitOp, LT.second, CostKind)
+                     : 0;
+  return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
 }
 
 InstructionCost RISCVTTIImpl::getExtendedReductionCost(
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6fe098628ea078e..95c0f71249bb8c4 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -51,14 +51,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -148,19 +148,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 3e2344bb41aa3e2..4a4394e9f0f0daa 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -60,14 +60,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -94,14 +94,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -124,18 +124,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -157,19 +157,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index e8ef6e922043117..3fd71af896c95f1 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -15,14 +15,14 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_half'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -45,18 +45,18 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -78,19 +78,19 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -117,14 +117,14 @@ define void @reduce_oredered_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_half'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -147,18 +147,18 @@ define void @reduce_oredered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_float'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -180,19 +180,19 @@ define void @reduce_oredered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_double'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index fb23b46140d3909..b8921c0ffa909a0 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -60,14 +60,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -94,14 +94,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -124,18 +124,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -157,19 +157,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 963615125794524..0dd3e3cf53e1cad 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -12,7 +12,7 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -25,7 +25,7 @@ define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -40,7 +40,7 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -53,7 +53,7 @@ define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -68,7 +68,7 @@ define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -81,7 +81,7 @@ define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -96,7 +96,7 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -109,7 +109,7 @@ define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -124,7 +124,7 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -140,7 +140,7 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -156,7 +156,7 @@ define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -169,7 +169,7 @@ define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -184,7 +184,7 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -200,7 +200,7 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -216,7 +216,7 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -229,7 +229,7 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -244,7 +244,7 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -260,7 +260,7 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -276,7 +276,7 @@ define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -289,7 +289,7 @@ define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -304,7 +304,7 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -320,7 +320,7 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -336,7 +336,7 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -349,7 +349,7 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -364,7 +364,7 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -380,7 +380,7 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -396,7 +396,7 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -409,7 +409,7 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -424,7 +424,7 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -440,7 +440,7 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -970,7 +970,7 @@ define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index ca5b9774160e8c9..80efe912c86985e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -12,7 +12,7 @@ define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -87,7 +87,7 @@ define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -102,7 +102,7 @@ define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -117,7 +117,7 @@ define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -132,7 +132,7 @@ define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -207,7 +207,7 @@ define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -222,7 +222,7 @@ define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -237,7 +237,7 @@ define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -252,7 +252,7 @@ define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -327,7 +327,7 @@ define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -342,7 +342,7 @@ define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -357,7 +357,7 @@ define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -372,7 +372,7 @@ define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -387,7 +387,7 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -403,7 +403,7 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -479,7 +479,7 @@ define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -494,7 +494,7 @@ define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -509,7 +509,7 @@ define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -524,7 +524,7 @@ define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -539,7 +539,7 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -555,7 +555,7 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -631,7 +631,7 @@ define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -646,7 +646,7 @@ define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -661,7 +661,7 @@ define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -676,7 +676,7 @@ define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
@@ -691,7 +691,7 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -707,7 +707,7 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -783,7 +783,7 @@ define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
@@ -798,7 +798,7 @@ define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
@@ -813,7 +813,7 @@ define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
@@ -828,7 +828,7 @@ define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -843,7 +843,7 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -859,7 +859,7 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -935,7 +935,7 @@ define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -950,7 +950,7 @@ define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -965,7 +965,7 @@ define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -980,7 +980,7 @@ define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
@@ -995,7 +995,7 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1011,7 +1011,7 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1087,7 +1087,7 @@ define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
@@ -1102,7 +1102,7 @@ define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
@@ -1117,7 +1117,7 @@ define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
@@ -1132,7 +1132,7 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1147,7 +1147,7 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1163,7 +1163,7 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1239,7 +1239,7 @@ define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1254,7 +1254,7 @@ define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1269,7 +1269,7 @@ define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1284,7 +1284,7 @@ define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
@@ -1299,7 +1299,7 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1315,7 +1315,7 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1391,7 +1391,7 @@ define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
@@ -1406,7 +1406,7 @@ define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
@@ -1421,7 +1421,7 @@ define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
@@ -1436,7 +1436,7 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@@ -1451,7 +1451,7 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1467,7 +1467,7 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1543,7 +1543,7 @@ define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@@ -1558,7 +1558,7 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@@ -1573,7 +1573,7 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@@ -1588,7 +1588,7 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@@ -1603,7 +1603,7 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1619,7 +1619,7 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1695,7 +1695,7 @@ define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@@ -1710,7 +1710,7 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@@ -1725,7 +1725,7 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index b79cdb87a06a05d..728045f6797b96d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -51,14 +51,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -85,14 +85,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -115,18 +115,18 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -148,19 +148,19 @@ define i32 @reduce_i64(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index 43a03404e8db659..2fc0b45808f772e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -522,8 +522,8 @@ define void @reduce_add() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
@@ -593,8 +593,8 @@ define void @reduce_fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)