[llvm] [RISCV][TTI] Implement instruction cost for vp.reduce.* (PR #114184)

Tue Oct 29 22:19:05 PDT 2024

https://github.com/ElvisWang123 created https://github.com/llvm/llvm-project/pull/114184

This patch based on #114180. 

>From d6b3e4527c79074511ec39fc9140159ce6399723 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 20:47:58 -0700
Subject: [PATCH 1/4] [RISCV] Only calculate ordered reduction with fp type.
 NFC

---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 395baa5f1aab99..bfc3f5b29a2aaa 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1552,7 +1552,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
   }
 
   // IR Reduction is composed by two vmv and one rvv reduction instruction.
-  if (TTI::requiresOrderedReduction(FMF)) {
+  if (TTI::requiresOrderedReduction(FMF) && ElementTy->isFloatingPointTy()) {
     Opcodes.push_back(RISCV::VFMV_S_F);
     for (unsigned i = 0; i < LT.first.getValue(); i++)
       Opcodes.push_back(RISCV::VFREDOSUM_VS);

>From 7a68fe50f5be19996bb142de82a8d7c883c05582 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 18:49:05 -0700
Subject: [PATCH 2/4] Precommit testcases.

---
 .../Analysis/CostModel/RISCV/reduce-add.ll    | 141 +++++--
 .../Analysis/CostModel/RISCV/reduce-and.ll    | 147 ++++++--
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 214 ++++++++++-
 .../CostModel/RISCV/reduce-fmaximum.ll        | 182 ++++++++-
 .../CostModel/RISCV/reduce-fminimum.ll        |  98 ++++-
 .../Analysis/CostModel/RISCV/reduce-fmul.ll   | 214 ++++++++++-
 .../Analysis/CostModel/RISCV/reduce-max.ll    | 275 +++++++++++---
 .../Analysis/CostModel/RISCV/reduce-min.ll    | 272 +++++++++++---
 .../Analysis/CostModel/RISCV/reduce-or.ll     | 150 ++++++--
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 211 ++++++++++-
 .../CostModel/RISCV/reduce-scalable-int.ll    | 352 +++++++++++++++++-
 .../Analysis/CostModel/RISCV/reduce-xor.ll    | 141 +++++--
 12 files changed, 2172 insertions(+), 225 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6032ae01aa718b..1edcdecb923e57 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index a8eb4e9a280dd0..f72298b8060631 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -17,6 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -31,6 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -44,6 +68,18 @@ define i32 @reduce_i1(i32 %arg) {
   %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
   %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
   %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+  %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+  %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+  %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -57,6 +93,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -68,6 +111,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -78,6 +128,14 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -91,6 +149,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -102,6 +168,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -112,6 +186,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -125,6 +208,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -136,6 +227,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -146,32 +245,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
@@ -180,6 +266,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 588d852d7f26e2..4ab1553afda093 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s  --check-prefix=SIZE
 
 define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-LABEL: 'reduce_fadd_bfloat'
@@ -13,6 +13,14 @@ define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_bfloat'
@@ -24,6 +32,14 @@ define void @reduce_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -34,6 +50,15 @@ define void @reduce_fadd_bfloat() {
   %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+
+  %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -47,6 +72,14 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -58,6 +91,14 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_half'
@@ -69,6 +110,14 @@ define void @reduce_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -79,6 +128,15 @@ define void @reduce_fadd_half() {
   %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
   %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
   %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+
+  %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -92,6 +150,14 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
@@ -103,6 +169,14 @@ define void @reduce_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -113,6 +187,15 @@ define void @reduce_fadd_float() {
   %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
   %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
   %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+
+  %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -126,6 +209,14 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
@@ -137,6 +228,14 @@ define void @reduce_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -147,6 +246,15 @@ define void @reduce_fadd_double() {
   %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
   %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
   %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+
+  %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -160,6 +268,14 @@ define void @reduce_oredered_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_bfloat'
@@ -171,6 +287,14 @@ define void @reduce_oredered_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -181,6 +305,15 @@ define void @reduce_oredered_fadd_bfloat() {
   %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+
+  %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -194,6 +327,14 @@ define void @reduce_oredered_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_half'
@@ -205,6 +346,14 @@ define void @reduce_oredered_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -215,6 +364,15 @@ define void @reduce_oredered_fadd_half() {
   %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
   %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
   %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+
+  %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -228,6 +386,14 @@ define void @reduce_oredered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_float'
@@ -239,6 +405,14 @@ define void @reduce_oredered_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -249,6 +423,15 @@ define void @reduce_oredered_fadd_float() {
   %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
   %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
   %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+
+  %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -262,6 +445,14 @@ define void @reduce_oredered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_double'
@@ -273,6 +464,14 @@ define void @reduce_oredered_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -283,6 +482,15 @@ define void @reduce_oredered_fadd_double() {
   %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
   %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
   %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+
+  %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index b14c60012077de..dedeb4be67ae82 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f32'
@@ -20,6 +22,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -37,6 +67,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -53,6 +111,36 @@ call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
 call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+
+%V2_vp   = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+%V4_vp   = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+%V8_vp   = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+%V16_vp   = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+%V32_vp   = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+%V64_vp   = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+%V128_vp   = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+
+call float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
 ret float undef
 }
 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
@@ -77,6 +165,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -92,6 +208,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
@@ -106,6 +250,36 @@ call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
 call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+
+call double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+
+call double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
 ret double undef
 }
 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 2172a85bc46aaf..6d51911b4fc408 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f32'
@@ -13,6 +15,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -23,6 +39,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -32,6 +62,22 @@ define float @reduce_fmaximum_f32(float %arg) {
 %V32   = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 %V64   = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 %V128   = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+
+%V2_vp   = call float @llvm.vp.reduce.fminimum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+%V4_vp   = call float @llvm.vp.reduce.fminimum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+%V8_vp   = call float @llvm.vp.reduce.fminimum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+%V16_vp   = call float @llvm.vp.reduce.fminimum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+%V32_vp   = call float @llvm.vp.reduce.fminimum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+%V64_vp   = call float @llvm.vp.reduce.fminimum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+%V128_vp   = call float @llvm.vp.reduce.fminimum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+
+call float @llvm.vp.reduce.fmin.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
 ret float undef
 }
 declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
@@ -50,6 +96,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -59,6 +119,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
@@ -67,6 +141,22 @@ define double @reduce_fmaximum_f64(double %arg) {
 %V16   = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 %V32   = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 %V64   = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+
+%V2_vp   = call double @llvm.vp.reduce.fminimum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+%V4_vp   = call double @llvm.vp.reduce.fminimum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+%V8_vp   = call double @llvm.vp.reduce.fminimum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+%V16_vp   = call double @llvm.vp.reduce.fminimum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+%V32_vp   = call double @llvm.vp.reduce.fminimum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+%V64_vp   = call double @llvm.vp.reduce.fminimum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+%V128_vp   = call double @llvm.vp.reduce.fminimum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+
+call double @llvm.vp.reduce.fmin.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
 ret double undef
 }
 declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index 162562c7b89310..a8439b14b28e9f 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s  --check-prefix=SIZE
 
 define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-LABEL: 'reduce_fmul_bfloat'
@@ -13,6 +13,14 @@ define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_bfloat'
@@ -24,6 +32,14 @@ define void @reduce_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -34,6 +50,15 @@ define void @reduce_fmul_bfloat() {
   %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+
+  %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -47,6 +72,14 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
@@ -58,6 +91,14 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_half'
@@ -69,6 +110,14 @@ define void @reduce_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -79,6 +128,15 @@ define void @reduce_fmul_half() {
   %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
   %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
   %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
+
+  %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -92,6 +150,14 @@ define void @reduce_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_float'
@@ -103,6 +169,14 @@ define void @reduce_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -113,6 +187,15 @@ define void @reduce_fmul_float() {
   %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
   %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
   %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
+
+  %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -126,6 +209,14 @@ define void @reduce_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_double'
@@ -137,6 +228,14 @@ define void @reduce_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -147,6 +246,15 @@ define void @reduce_fmul_double() {
   %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
   %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
   %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
+
+  %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -160,6 +268,14 @@ define void @reduce_ordered_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
@@ -171,6 +287,14 @@ define void @reduce_ordered_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -181,6 +305,15 @@ define void @reduce_ordered_fmul_bfloat() {
   %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+
+  %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -194,6 +327,14 @@ define void @reduce_ordered_fmul_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_half'
@@ -205,6 +346,14 @@ define void @reduce_ordered_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -215,6 +364,15 @@ define void @reduce_ordered_fmul_half() {
   %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
   %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
   %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
+
+  %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -228,6 +386,14 @@ define void @reduce_ordered_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_float'
@@ -239,6 +405,14 @@ define void @reduce_ordered_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -249,6 +423,15 @@ define void @reduce_ordered_fmul_float() {
   %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
   %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
   %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
+
+  %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
   ret void
 }
 
@@ -262,6 +445,14 @@ define void @reduce_ordered_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_double'
@@ -273,6 +464,14 @@ define void @reduce_ordered_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -283,5 +482,14 @@ define void @reduce_ordered_fmul_double() {
   %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
   %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
   %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
+
+  %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c5d677e574c13c..c037eb5bfc17f4 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_umin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_umax_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -81,7 +133,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i16'
@@ -92,7 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -102,7 +168,15 @@ define i32 @reduce_umax_i16(i32 %arg) {
   %V16  = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
   %V32  = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-  %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +190,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i32'
@@ -127,6 +209,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -137,32 +227,19 @@ define i32 @reduce_umax_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_umax_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_umax_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_umax_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -171,6 +248,15 @@ define i32 @reduce_umax_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -184,6 +270,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -195,6 +289,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
@@ -205,6 +307,15 @@ define i32 @reduce_smin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -218,6 +329,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i8'
@@ -229,6 +348,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -239,6 +366,15 @@ define i32 @reduce_smax_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -252,6 +388,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i16'
@@ -263,6 +407,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -273,6 +425,15 @@ define i32 @reduce_smax_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -286,6 +447,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i32'
@@ -297,6 +466,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -307,32 +484,19 @@ define i32 @reduce_smax_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_smax_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_smax_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_smax_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
@@ -341,6 +505,15 @@ define i32 @reduce_smax_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 89bff381384156..858c053f0c3ef3 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_umin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_umin_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -82,6 +134,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_umin_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +193,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_umin_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_umin_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_umin_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_umin_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_umin_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -184,6 +273,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -195,6 +292,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
@@ -205,6 +310,15 @@ define i32 @reduce_smin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -218,6 +332,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i8'
@@ -229,6 +351,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -239,6 +369,15 @@ define i32 @reduce_smin_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -252,6 +391,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i16'
@@ -263,6 +410,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -273,6 +428,15 @@ define i32 @reduce_smin_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -286,6 +450,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i32'
@@ -297,6 +469,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -307,32 +487,19 @@ define i32 @reduce_smin_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_smin_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_smin_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_smin_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
@@ -341,6 +508,15 @@ define i32 @reduce_smin_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index d75a95f3fadd97..2db77240fc3fcf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -17,6 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -31,6 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -44,6 +68,18 @@ define i32 @reduce_i1(i32 %arg) {
   %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
   %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
   %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+  %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+  %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+  %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -57,6 +93,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -68,6 +112,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -78,6 +130,15 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -91,6 +152,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -102,6 +171,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -112,6 +189,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -125,6 +211,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -136,6 +230,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -146,32 +248,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
@@ -180,6 +269,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 4f6e0ba074ed81..7f1ff31f594348 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -1,34 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
 
 define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+  %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -37,26 +45,32 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
 define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+  %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -65,26 +79,32 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
 define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+  %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -93,26 +113,32 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
 define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -120,15 +146,18 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -136,15 +165,18 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -153,26 +185,32 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
 define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -180,15 +218,18 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -196,15 +237,18 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -213,26 +257,32 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
 define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -240,15 +290,18 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -256,15 +309,18 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -273,26 +329,32 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
 define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -300,15 +362,18 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -316,15 +381,18 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -333,26 +401,32 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
 define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -360,15 +434,18 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -376,15 +453,18 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -393,26 +473,32 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
 define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -420,15 +506,18 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -436,15 +525,18 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -453,39 +545,48 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -494,13 +595,16 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -509,13 +613,16 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -524,13 +631,16 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -539,39 +649,48 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -580,13 +699,16 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -595,13 +717,16 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -610,13 +735,16 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -625,39 +753,48 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -666,13 +803,16 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -681,13 +821,16 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -696,13 +839,16 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -711,39 +857,48 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -752,13 +907,16 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -767,13 +925,16 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -782,13 +943,16 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -797,39 +961,48 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -838,13 +1011,16 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -853,13 +1029,16 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -868,13 +1047,16 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -883,39 +1065,48 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -924,13 +1115,16 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -939,13 +1133,16 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -954,25 +1151,31 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
   ret double %red
 }
 
 define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+  %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index 2807f7526760f8..b565cc9ac3af4a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -1,21 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
 
 define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -24,13 +29,16 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -39,13 +47,16 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -54,13 +65,16 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -69,13 +83,16 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -84,13 +101,16 @@ declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -99,13 +119,16 @@ declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -114,13 +137,16 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -129,13 +155,16 @@ declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -144,13 +173,16 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -159,13 +191,16 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -174,13 +209,16 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -189,13 +227,16 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -204,13 +245,16 @@ declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -219,13 +263,16 @@ declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -234,13 +281,16 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -249,13 +299,16 @@ declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -264,13 +317,16 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -279,13 +335,16 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -294,13 +353,16 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -309,13 +371,16 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -324,13 +389,16 @@ declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -339,13 +407,16 @@ declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -354,13 +425,16 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -369,13 +443,16 @@ declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -383,15 +460,18 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -399,15 +479,18 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -416,13 +499,16 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -431,13 +517,16 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -446,13 +535,16 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -461,13 +553,16 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -476,13 +571,16 @@ declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -491,13 +589,16 @@ declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -506,13 +607,16 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -521,13 +625,16 @@ declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -535,15 +642,18 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -551,15 +661,18 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -568,13 +681,16 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -583,13 +699,16 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -598,13 +717,16 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -613,13 +735,16 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -628,13 +753,16 @@ declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -643,13 +771,16 @@ declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -658,13 +789,16 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -673,13 +807,16 @@ declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -687,15 +824,18 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -703,15 +843,18 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -720,13 +863,16 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -735,13 +881,16 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -750,13 +899,16 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -765,13 +917,16 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -780,13 +935,16 @@ declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -795,13 +953,16 @@ declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -810,13 +971,16 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -825,13 +989,16 @@ declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -839,15 +1006,18 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -855,15 +1025,18 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -872,13 +1045,16 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -887,13 +1063,16 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -902,13 +1081,16 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -917,13 +1099,16 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -932,13 +1117,16 @@ declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -947,13 +1135,16 @@ declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -962,13 +1153,16 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -977,13 +1171,16 @@ declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -991,15 +1188,18 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1007,15 +1207,18 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1024,13 +1227,16 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1039,13 +1245,16 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1054,13 +1263,16 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1069,13 +1281,16 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1084,13 +1299,16 @@ declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1099,13 +1317,16 @@ declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1114,13 +1335,16 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1129,13 +1353,16 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1143,15 +1370,18 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1159,15 +1389,18 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1176,13 +1409,16 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1191,13 +1427,16 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1206,13 +1445,16 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1221,13 +1463,16 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1236,13 +1481,16 @@ declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1251,13 +1499,16 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1266,13 +1517,16 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1281,13 +1535,16 @@ declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1295,15 +1552,18 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1311,15 +1571,18 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1328,13 +1591,16 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1343,13 +1609,16 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1358,13 +1627,16 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1373,13 +1645,16 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1388,13 +1663,16 @@ declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1403,13 +1681,16 @@ declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1418,13 +1699,16 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1433,13 +1717,16 @@ declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1447,15 +1734,18 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1463,15 +1753,18 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1480,13 +1773,16 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1495,13 +1791,16 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1510,13 +1809,16 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1525,13 +1827,16 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1540,13 +1845,16 @@ declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1555,13 +1863,16 @@ declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1570,13 +1881,16 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1585,13 +1899,16 @@ declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1599,15 +1916,18 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1615,15 +1935,18 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1632,13 +1955,16 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1647,13 +1973,16 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1662,13 +1991,16 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1677,13 +2009,16 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1692,13 +2027,16 @@ declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1707,13 +2045,16 @@ declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1722,12 +2063,15 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index aa03b02895d5f5..228fa602be0bdb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 

>From dce33167546da4c857e9d3c2f6fcf437c130a68a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 19:14:20 -0700
Subject: [PATCH 3/4] [RISCV][TTI] Implement instruction costs for vp.reduce.*.

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  69 +++
 .../Analysis/CostModel/RISCV/reduce-add.ll    | 166 ++++---
 .../Analysis/CostModel/RISCV/reduce-and.ll    | 174 ++++---
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 272 +++++------
 .../CostModel/RISCV/reduce-fmaximum.ll        | 224 ++++-----
 .../CostModel/RISCV/reduce-fminimum.ll        | 112 ++---
 .../Analysis/CostModel/RISCV/reduce-fmul.ll   | 254 +++++-----
 .../Analysis/CostModel/RISCV/reduce-max.ll    | 332 ++++++++-----
 .../Analysis/CostModel/RISCV/reduce-min.ll    | 332 ++++++++-----
 .../Analysis/CostModel/RISCV/reduce-or.ll     | 178 ++++---
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 268 +++++-----
 .../CostModel/RISCV/reduce-scalable-int.ll    | 456 +++++++++---------
 .../Analysis/CostModel/RISCV/reduce-xor.ll    | 166 ++++---
 .../CostModel/RISCV/rvv-intrinsics.ll         |  64 +--
 14 files changed, 1720 insertions(+), 1347 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index bfc3f5b29a2aaa..f843f5588a17f2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1179,6 +1179,75 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
                               ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
                               CostKind);
+  case Intrinsic::vp_reduce_add:
+  case Intrinsic::vp_reduce_fadd:
+  case Intrinsic::vp_reduce_mul:
+  case Intrinsic::vp_reduce_fmul:
+  case Intrinsic::vp_reduce_and:
+  case Intrinsic::vp_reduce_or:
+  case Intrinsic::vp_reduce_xor: {
+    unsigned Opcode;
+    switch (ICA.getID()) {
+    case Intrinsic::vp_reduce_add:
+      Opcode = Instruction::Add;
+      break;
+    case Intrinsic::vp_reduce_fadd:
+      Opcode = Instruction::FAdd;
+      break;
+    case Intrinsic::vp_reduce_mul:
+      Opcode = Instruction::Mul;
+      break;
+    case Intrinsic::vp_reduce_fmul:
+      Opcode = Instruction::FMul;
+      break;
+    case Intrinsic::vp_reduce_and:
+      Opcode = Instruction::And;
+      break;
+    case Intrinsic::vp_reduce_or:
+      Opcode = Instruction::Or;
+      break;
+    case Intrinsic::vp_reduce_xor:
+      Opcode = Instruction::Xor;
+      break;
+    }
+    return getArithmeticReductionCost(Opcode,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      ICA.getFlags(), CostKind);
+  }
+  case Intrinsic::vp_reduce_smax:
+  case Intrinsic::vp_reduce_smin:
+  case Intrinsic::vp_reduce_umax:
+  case Intrinsic::vp_reduce_umin:
+  case Intrinsic::vp_reduce_fmax:
+  case Intrinsic::vp_reduce_fmaximum:
+  case Intrinsic::vp_reduce_fmin:
+  case Intrinsic::vp_reduce_fminimum: {
+    unsigned IID;
+    switch (ICA.getID()) {
+    case Intrinsic::vp_reduce_smax:
+      IID = Intrinsic::smax;
+      break;
+    case Intrinsic::vp_reduce_smin:
+      IID = Intrinsic::smin;
+      break;
+    case Intrinsic::vp_reduce_umax:
+      IID = Intrinsic::umax;
+      break;
+    case Intrinsic::vp_reduce_umin:
+      IID = Intrinsic::umin;
+      break;
+    case Intrinsic::vp_reduce_fmax:
+    case Intrinsic::vp_reduce_fmaximum:
+      IID = Intrinsic::maximum;
+      break;
+    case Intrinsic::vp_reduce_fmin:
+    case Intrinsic::vp_reduce_fminimum:
+      IID = Intrinsic::minimum;
+      break;
+    }
+    return getMinMaxReductionCost(IID, cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
+  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 1edcdecb923e57..70687da17eb1a5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -16,14 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index f72298b8060631..76f2bd949c652c 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -19,17 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -44,17 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -93,13 +93,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -111,13 +111,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -149,14 +149,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -168,14 +168,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -208,14 +208,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -227,14 +227,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -258,6 +258,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 4ab1553afda093..c4f34f1c9d2ce9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -13,14 +13,14 @@ define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_bfloat'
@@ -32,14 +32,14 @@ define void @reduce_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -72,14 +72,14 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -91,14 +91,14 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_half'
@@ -110,14 +110,14 @@ define void @reduce_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -150,14 +150,14 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
@@ -169,14 +169,14 @@ define void @reduce_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -209,14 +209,14 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
@@ -228,14 +228,14 @@ define void @reduce_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -268,14 +268,14 @@ define void @reduce_oredered_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_bfloat'
@@ -287,14 +287,14 @@ define void @reduce_oredered_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -327,14 +327,14 @@ define void @reduce_oredered_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_half'
@@ -346,14 +346,14 @@ define void @reduce_oredered_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -386,14 +386,14 @@ define void @reduce_oredered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_float'
@@ -405,14 +405,14 @@ define void @reduce_oredered_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -445,14 +445,14 @@ define void @reduce_oredered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_oredered_fadd_double'
@@ -464,14 +464,14 @@ define void @reduce_oredered_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index dedeb4be67ae82..6e33ccaaed5c7e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -22,34 +22,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -67,34 +67,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -165,34 +165,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -208,34 +208,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 6d51911b4fc408..d8fd6393039282 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -15,20 +15,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -39,20 +39,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -96,20 +96,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -119,20 +119,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index a8439b14b28e9f..fc0099d5cfafc7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -14,13 +14,13 @@ define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_bfloat'
@@ -33,13 +33,13 @@ define void @reduce_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -73,13 +73,13 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
@@ -92,13 +92,13 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_half'
@@ -111,13 +111,13 @@ define void @reduce_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -151,13 +151,13 @@ define void @reduce_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 121 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_float'
@@ -170,13 +170,13 @@ define void @reduce_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -210,13 +210,13 @@ define void @reduce_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 361 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_double'
@@ -229,13 +229,13 @@ define void @reduce_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -268,14 +268,14 @@ define void @reduce_ordered_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
@@ -287,14 +287,14 @@ define void @reduce_ordered_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -327,14 +327,14 @@ define void @reduce_ordered_fmul_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_half'
@@ -346,14 +346,14 @@ define void @reduce_ordered_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -386,14 +386,14 @@ define void @reduce_ordered_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_float'
@@ -405,14 +405,14 @@ define void @reduce_ordered_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -445,14 +445,14 @@ define void @reduce_ordered_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_double'
@@ -464,14 +464,14 @@ define void @reduce_ordered_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c037eb5bfc17f4..f77e94cd333aa5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -16,14 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -133,14 +133,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i16'
@@ -151,14 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -190,14 +190,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i32'
@@ -209,14 +209,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -240,6 +240,44 @@ define i32 @reduce_umax_i32(i32 %arg) {
 }
 
 define i32 @reduce_umax_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_umax_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_umax_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -270,14 +308,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -289,14 +327,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
@@ -329,14 +367,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i8'
@@ -348,14 +386,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -388,14 +426,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i16'
@@ -407,14 +445,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -447,14 +485,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i32'
@@ -466,14 +504,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -497,6 +535,44 @@ define i32 @reduce_smax_i32(i32 %arg) {
 }
 
 define i32 @reduce_smax_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_smax_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_smax_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 858c053f0c3ef3..3e6a19e86a904a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -16,14 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_umin_i32(i32 %arg) {
 }
 
 define i32 @reduce_umin_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_umin_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_umin_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@@ -273,14 +311,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -292,14 +330,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
@@ -332,14 +370,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i8'
@@ -351,14 +389,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -391,14 +429,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i16'
@@ -410,14 +448,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -450,14 +488,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i32'
@@ -469,14 +507,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -500,6 +538,44 @@ define i32 @reduce_smin_i32(i32 %arg) {
 }
 
 define i32 @reduce_smin_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_smin_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_smin_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index 2db77240fc3fcf..69805e79641011 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -19,17 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -44,17 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -93,14 +93,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -112,14 +112,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -152,14 +152,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -171,14 +171,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -211,14 +211,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -230,14 +230,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -261,6 +261,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 7f1ff31f594348..134f75d6b4692d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -11,12 +11,12 @@ declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
 define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -27,12 +27,12 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -45,12 +45,12 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
 define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -61,12 +61,12 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -79,12 +79,12 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
 define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -95,12 +95,12 @@ define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -113,12 +113,12 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
 define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -129,12 +129,12 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -146,13 +146,13 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -165,13 +165,13 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -185,12 +185,12 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
 define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -201,12 +201,12 @@ define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -218,13 +218,13 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -237,13 +237,13 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -257,12 +257,12 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
 define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -273,12 +273,12 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -290,13 +290,13 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -309,13 +309,13 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -329,12 +329,12 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
 define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -345,12 +345,12 @@ define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -362,13 +362,13 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -381,13 +381,13 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -401,12 +401,12 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
 define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -417,12 +417,12 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -434,13 +434,13 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -453,13 +453,13 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -473,12 +473,12 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
 define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -489,12 +489,12 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -506,13 +506,13 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -525,13 +525,13 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -545,12 +545,12 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -561,12 +561,12 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -577,12 +577,12 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -595,12 +595,12 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -613,12 +613,12 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -631,12 +631,12 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -649,12 +649,12 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -665,12 +665,12 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -681,12 +681,12 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -699,12 +699,12 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -717,12 +717,12 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -735,12 +735,12 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -753,12 +753,12 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -769,12 +769,12 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -785,12 +785,12 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -803,12 +803,12 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -821,12 +821,12 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -839,12 +839,12 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -857,12 +857,12 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -873,12 +873,12 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -889,12 +889,12 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -907,12 +907,12 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -925,12 +925,12 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -943,12 +943,12 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -961,12 +961,12 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -977,12 +977,12 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -993,12 +993,12 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -1011,12 +1011,12 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -1029,12 +1029,12 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -1047,12 +1047,12 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -1065,12 +1065,12 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1081,12 +1081,12 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1097,12 +1097,12 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1115,12 +1115,12 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -1133,12 +1133,12 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -1151,12 +1151,12 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
@@ -1167,12 +1167,12 @@ define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index b565cc9ac3af4a..6cd817a93552b7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -11,12 +11,12 @@ declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -29,12 +29,12 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@@ -47,12 +47,12 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@@ -65,12 +65,12 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@@ -83,12 +83,12 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@@ -101,12 +101,12 @@ declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -119,12 +119,12 @@ declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -137,12 +137,12 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -155,12 +155,12 @@ declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -173,12 +173,12 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@@ -191,12 +191,12 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@@ -209,12 +209,12 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@@ -227,12 +227,12 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@@ -245,12 +245,12 @@ declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -263,12 +263,12 @@ declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -281,12 +281,12 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -299,12 +299,12 @@ declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -317,12 +317,12 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@@ -335,12 +335,12 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@@ -353,12 +353,12 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@@ -371,12 +371,12 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@@ -389,12 +389,12 @@ declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -407,12 +407,12 @@ declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -425,12 +425,12 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -443,12 +443,12 @@ declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -460,13 +460,13 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -479,13 +479,13 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -499,12 +499,12 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@@ -517,12 +517,12 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@@ -535,12 +535,12 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@@ -553,12 +553,12 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@@ -571,12 +571,12 @@ declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -589,12 +589,12 @@ declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -607,12 +607,12 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -625,12 +625,12 @@ declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -642,13 +642,13 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -661,13 +661,13 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -681,12 +681,12 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@@ -699,12 +699,12 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@@ -717,12 +717,12 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@@ -735,12 +735,12 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@@ -753,12 +753,12 @@ declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -771,12 +771,12 @@ declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -789,12 +789,12 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -807,12 +807,12 @@ declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
@@ -824,13 +824,13 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -843,13 +843,13 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -863,12 +863,12 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
@@ -881,12 +881,12 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
@@ -899,12 +899,12 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
@@ -917,12 +917,12 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
@@ -935,12 +935,12 @@ declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
@@ -953,12 +953,12 @@ declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
@@ -971,12 +971,12 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
@@ -989,12 +989,12 @@ declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -1006,13 +1006,13 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -1025,13 +1025,13 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -1045,12 +1045,12 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@@ -1063,12 +1063,12 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@@ -1081,12 +1081,12 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@@ -1099,12 +1099,12 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@@ -1117,12 +1117,12 @@ declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -1135,12 +1135,12 @@ declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -1153,12 +1153,12 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -1171,12 +1171,12 @@ declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
@@ -1188,13 +1188,13 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1207,13 +1207,13 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1227,12 +1227,12 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1245,12 +1245,12 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1263,12 +1263,12 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1281,12 +1281,12 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1299,12 +1299,12 @@ declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
@@ -1317,12 +1317,12 @@ declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
@@ -1335,12 +1335,12 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
@@ -1353,12 +1353,12 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1370,13 +1370,13 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1389,13 +1389,13 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1409,12 +1409,12 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1427,12 +1427,12 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1445,12 +1445,12 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1463,12 +1463,12 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1481,12 +1481,12 @@ declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1499,12 +1499,12 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1517,12 +1517,12 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1535,12 +1535,12 @@ declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
@@ -1552,13 +1552,13 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1571,13 +1571,13 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1591,12 +1591,12 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1609,12 +1609,12 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1627,12 +1627,12 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1645,12 +1645,12 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1663,12 +1663,12 @@ declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
@@ -1681,12 +1681,12 @@ declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
@@ -1699,12 +1699,12 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
@@ -1717,12 +1717,12 @@ declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@@ -1734,13 +1734,13 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1753,13 +1753,13 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1773,12 +1773,12 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1791,12 +1791,12 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1809,12 +1809,12 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1827,12 +1827,12 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1845,12 +1845,12 @@ declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@@ -1863,12 +1863,12 @@ declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@@ -1881,12 +1881,12 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@@ -1899,12 +1899,12 @@ declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@@ -1916,13 +1916,13 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1935,13 +1935,13 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1955,12 +1955,12 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1973,12 +1973,12 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1991,12 +1991,12 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
@@ -2009,12 +2009,12 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
@@ -2027,12 +2027,12 @@ declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@@ -2045,12 +2045,12 @@ declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@@ -2063,12 +2063,12 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index 228fa602be0bdb..1ea5bcdf8ef9d9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -16,14 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index bb98508f239c1b..869e51966e092a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -1218,37 +1218,37 @@ define void @reduce_add() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'reduce_add'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1324,37 +1324,37 @@ define void @reduce_fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'reduce_fadd'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;

>From 44145121e8f34c9fe54868c59c86d48c85bf9b43 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 21:58:43 -0700
Subject: [PATCH 4/4] Fix missing tests

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |   4 +
 .../Analysis/CostModel/RISCV/reduce-and.ll    |   3 +
 .../CostModel/RISCV/reduce-fmaximum.ll        | 112 +++++++-------
 .../CostModel/RISCV/reduce-fminimum.ll        |  56 +++----
 .../Analysis/CostModel/RISCV/reduce-max.ll    |   3 +
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 144 +++++++++---------
 6 files changed, 166 insertions(+), 156 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index f843f5588a17f2..627eb9cc4bc5fe 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1237,10 +1237,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
       IID = Intrinsic::umin;
       break;
     case Intrinsic::vp_reduce_fmax:
+      IID = Intrinsic::maxnum;
+      break;
     case Intrinsic::vp_reduce_fmaximum:
       IID = Intrinsic::maximum;
       break;
     case Intrinsic::vp_reduce_fmin:
+      IID = Intrinsic::minnum;
+      break;
     case Intrinsic::vp_reduce_fminimum:
       IID = Intrinsic::minimum;
       break;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 76f2bd949c652c..47ae0bfa58cf9a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -100,6 +100,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -118,6 +119,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -136,6 +138,7 @@ define i32 @reduce_i8(i32 %arg) {
   %V16_vp  = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %V32_vp  = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
   %V64_vp  = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp  = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 6e33ccaaed5c7e..c28fdfbfddac1a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -36,20 +36,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -81,20 +81,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -179,20 +179,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -222,20 +222,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index d8fd6393039282..99c9f5d89f6963 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -22,13 +22,13 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -46,13 +46,13 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -103,13 +103,13 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -126,13 +126,13 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f77e94cd333aa5..e0f98a759cc89b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -133,6 +133,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
@@ -151,6 +152,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
@@ -168,6 +170,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
   %V16  = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
   %V32  = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+  %V128  = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 
   %V1_vp   = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
   %V2_vp   = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 134f75d6b4692d..74dbcfae93f858 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -545,12 +545,12 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -561,12 +561,12 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -577,12 +577,12 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -595,12 +595,12 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -613,12 +613,12 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -631,12 +631,12 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -649,12 +649,12 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -665,12 +665,12 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -681,12 +681,12 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -699,12 +699,12 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -717,12 +717,12 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -735,12 +735,12 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -753,12 +753,12 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -769,12 +769,12 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -785,12 +785,12 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -803,12 +803,12 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -821,12 +821,12 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -839,12 +839,12 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -857,12 +857,12 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -873,12 +873,12 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -889,12 +889,12 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -907,12 +907,12 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -925,12 +925,12 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -943,12 +943,12 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -961,12 +961,12 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -977,12 +977,12 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -993,12 +993,12 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -1011,12 +1011,12 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -1029,12 +1029,12 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -1047,12 +1047,12 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -1065,12 +1065,12 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1081,12 +1081,12 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1097,12 +1097,12 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1115,12 +1115,12 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -1133,12 +1133,12 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -1151,12 +1151,12 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)