[llvm] [RISCV][TTI] Implement instruction cost for vp.reduce.* (PR #114184)

Wed Nov 6 21:39:02 PST 2024

https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/114184

>From f1055d057ebfd2396c84d445dd7305c9562fa7e5 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 18:49:05 -0700
Subject: [PATCH 1/7] Precommit testcases.

---
 .../Analysis/CostModel/RISCV/reduce-add.ll    | 141 +++++--
 .../Analysis/CostModel/RISCV/reduce-and.ll    | 147 +++++--
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 385 +++++++++++++++---
 .../CostModel/RISCV/reduce-fmaximum.ll        | 182 ++++++++-
 .../CostModel/RISCV/reduce-fminimum.ll        |  98 ++++-
 .../Analysis/CostModel/RISCV/reduce-fmul.ll   | 355 +++++++++++++++-
 .../Analysis/CostModel/RISCV/reduce-max.ll    | 275 ++++++++++---
 .../Analysis/CostModel/RISCV/reduce-min.ll    | 272 ++++++++++---
 .../Analysis/CostModel/RISCV/reduce-or.ll     | 150 +++++--
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 211 +++++++++-
 .../CostModel/RISCV/reduce-scalable-int.ll    | 352 +++++++++++++++-
 .../Analysis/CostModel/RISCV/reduce-xor.ll    | 141 +++++--
 12 files changed, 2421 insertions(+), 288 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6032ae01aa718b..1edcdecb923e57 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index a8eb4e9a280dd0..f72298b8060631 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -17,6 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -31,6 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -44,6 +68,18 @@ define i32 @reduce_i1(i32 %arg) {
   %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
   %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
   %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+  %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+  %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+  %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -57,6 +93,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -68,6 +111,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -78,6 +128,14 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -91,6 +149,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -102,6 +168,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -112,6 +186,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -125,6 +208,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -136,6 +227,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -146,32 +245,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
@@ -180,6 +266,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 1762f701a9b2d5..f1636785b90b81 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s  --check-prefix=SIZE
 
 define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-LABEL: 'reduce_fadd_bfloat'
@@ -19,6 +19,20 @@ define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_bfloat'
@@ -36,6 +50,20 @@ define void @reduce_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -52,6 +80,21 @@ define void @reduce_fadd_bfloat() {
   %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
   %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
   %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+  %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -71,6 +114,20 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -88,6 +145,20 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_half'
@@ -105,6 +176,20 @@ define void @reduce_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -121,6 +206,21 @@ define void @reduce_fadd_half() {
   %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
   %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
   %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+  %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -139,6 +239,20 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
@@ -155,6 +269,20 @@ define void @reduce_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -170,6 +298,21 @@ define void @reduce_fadd_float() {
   %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
   %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
   %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+  %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.0, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -187,6 +330,20 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
@@ -202,6 +359,20 @@ define void @reduce_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -216,42 +387,27 @@ define void @reduce_fadd_double() {
   %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
   %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
   %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+  %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.0, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.0, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
 define void @reduce_ordered_fadd_bfloat() {
 ; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat'
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; SIZE-LABEL: 'reduce_ordered_fadd_bfloat'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -268,43 +424,25 @@ define void @reduce_ordered_fadd_bfloat() {
   %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
   %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
   %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+  %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
 define void @reduce_ordered_fadd_half() {
-; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half'
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half'
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fadd_half'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
@@ -321,6 +459,20 @@ define void @reduce_ordered_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -337,6 +489,21 @@ define void @reduce_ordered_fadd_half() {
   %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
   %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
   %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+  %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -355,6 +522,20 @@ define void @reduce_ordered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fadd_float'
@@ -371,6 +552,20 @@ define void @reduce_ordered_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -386,6 +581,21 @@ define void @reduce_ordered_fadd_float() {
   %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
   %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
   %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+  %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.0, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -403,6 +613,20 @@ define void @reduce_ordered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fadd_double'
@@ -418,6 +642,20 @@ define void @reduce_ordered_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -432,5 +670,20 @@ define void @reduce_ordered_fadd_double() {
   %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
   %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
   %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+  %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.0, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.0, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index b14c60012077de..dedeb4be67ae82 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f32'
@@ -20,6 +22,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -37,6 +67,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -53,6 +111,36 @@ call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
 call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+
+%V2_vp   = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+%V4_vp   = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+%V8_vp   = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+%V16_vp   = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+%V32_vp   = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+%V64_vp   = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+%V128_vp   = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+
+call float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
 ret float undef
 }
 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
@@ -77,6 +165,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -92,6 +208,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
@@ -106,6 +250,36 @@ call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
 call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+
+call double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+
+call double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
 ret double undef
 }
 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 2172a85bc46aaf..6d51911b4fc408 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-LABEL: 'reduce_fmaximum_f32'
@@ -13,6 +15,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -23,6 +39,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -32,6 +62,22 @@ define float @reduce_fmaximum_f32(float %arg) {
 %V32   = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 %V64   = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 %V128   = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+
+%V2_vp   = call float @llvm.vp.reduce.fminimum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+%V4_vp   = call float @llvm.vp.reduce.fminimum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+%V8_vp   = call float @llvm.vp.reduce.fminimum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+%V16_vp   = call float @llvm.vp.reduce.fminimum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+%V32_vp   = call float @llvm.vp.reduce.fminimum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+%V64_vp   = call float @llvm.vp.reduce.fminimum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+%V128_vp   = call float @llvm.vp.reduce.fminimum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+
+call float @llvm.vp.reduce.fmin.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
 ret float undef
 }
 declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
@@ -50,6 +96,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -59,6 +119,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
@@ -67,6 +141,22 @@ define double @reduce_fmaximum_f64(double %arg) {
 %V16   = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 %V32   = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 %V64   = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+
+%V2_vp   = call double @llvm.vp.reduce.fminimum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+%V4_vp   = call double @llvm.vp.reduce.fminimum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+%V8_vp   = call double @llvm.vp.reduce.fminimum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+%V16_vp   = call double @llvm.vp.reduce.fminimum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+%V32_vp   = call double @llvm.vp.reduce.fminimum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+%V64_vp   = call double @llvm.vp.reduce.fminimum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+%V128_vp   = call double @llvm.vp.reduce.fminimum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+
+call double @llvm.vp.reduce.fmin.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
 ret double undef
 }
 declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index 211bcb1343eea4..dc43a54ff1855b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s  --check-prefix=SIZE
 
 define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-LABEL: 'reduce_fmul_bfloat'
@@ -19,6 +19,20 @@ define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_bfloat'
@@ -36,6 +50,20 @@ define void @reduce_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -52,6 +80,21 @@ define void @reduce_fmul_bfloat() {
   %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
   %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
   %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+  %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -71,6 +114,20 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
@@ -88,6 +145,20 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_half'
@@ -105,6 +176,20 @@ define void @reduce_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -121,6 +206,21 @@ define void @reduce_fmul_half() {
   %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
   %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
   %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+  %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -139,6 +239,20 @@ define void @reduce_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_float'
@@ -155,6 +269,20 @@ define void @reduce_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -170,6 +298,21 @@ define void @reduce_fmul_float() {
   %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
   %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
   %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+  %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.0, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -187,6 +330,20 @@ define void @reduce_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_double'
@@ -202,6 +359,20 @@ define void @reduce_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -216,6 +387,21 @@ define void @reduce_fmul_double() {
   %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
   %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
   %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+  %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.0, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.0, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -235,6 +421,20 @@ define void @reduce_ordered_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
@@ -252,6 +452,20 @@ define void @reduce_ordered_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -268,6 +482,21 @@ define void @reduce_ordered_fmul_bfloat() {
   %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
   %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
   %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+  %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -287,6 +516,20 @@ define void @reduce_ordered_fmul_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_half'
@@ -304,6 +547,20 @@ define void @reduce_ordered_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -320,6 +577,21 @@ define void @reduce_ordered_fmul_half() {
   %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
   %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
   %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+  %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
   ret void
 }
 
@@ -338,6 +610,19 @@ define void @reduce_ordered_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_float'
@@ -354,6 +639,19 @@ define void @reduce_ordered_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -369,6 +667,20 @@ define void @reduce_ordered_fmul_float() {
   %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
   %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
   %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+  %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
   ret void
 }
 
@@ -386,6 +698,18 @@ define void @reduce_ordered_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_double'
@@ -401,6 +725,18 @@ define void @reduce_ordered_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -415,5 +751,18 @@ define void @reduce_ordered_fmul_double() {
   %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
   %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
   %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+  %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+  %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+  %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+  %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+  %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+  %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+  %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+  %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c5d677e574c13c..c037eb5bfc17f4 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_umin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_umax_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -81,7 +133,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i16'
@@ -92,7 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -102,7 +168,15 @@ define i32 @reduce_umax_i16(i32 %arg) {
   %V16  = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
   %V32  = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-  %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +190,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i32'
@@ -127,6 +209,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -137,32 +227,19 @@ define i32 @reduce_umax_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_umax_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_umax_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_umax_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -171,6 +248,15 @@ define i32 @reduce_umax_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -184,6 +270,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -195,6 +289,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
@@ -205,6 +307,15 @@ define i32 @reduce_smin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -218,6 +329,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i8'
@@ -229,6 +348,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -239,6 +366,15 @@ define i32 @reduce_smax_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -252,6 +388,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i16'
@@ -263,6 +407,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -273,6 +425,15 @@ define i32 @reduce_smax_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -286,6 +447,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i32'
@@ -297,6 +466,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -307,32 +484,19 @@ define i32 @reduce_smax_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_smax_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_smax_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_smax_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
@@ -341,6 +505,15 @@ define i32 @reduce_smax_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 89bff381384156..858c053f0c3ef3 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_umin_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_umin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_umin_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -82,6 +134,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_umin_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +193,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_umin_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_umin_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_umin_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_umin_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_umin_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -184,6 +273,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -195,6 +292,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
@@ -205,6 +310,15 @@ define i32 @reduce_smin_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -218,6 +332,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i8'
@@ -229,6 +351,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -239,6 +369,15 @@ define i32 @reduce_smin_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -252,6 +391,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i16'
@@ -263,6 +410,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -273,6 +428,15 @@ define i32 @reduce_smin_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -286,6 +450,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i32'
@@ -297,6 +469,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -307,32 +487,19 @@ define i32 @reduce_smin_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_smin_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_smin_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_smin_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
@@ -341,6 +508,15 @@ define i32 @reduce_smin_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index d75a95f3fadd97..2db77240fc3fcf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -17,6 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -31,6 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -44,6 +68,18 @@ define i32 @reduce_i1(i32 %arg) {
   %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
   %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
   %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+  %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+  %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+  %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -57,6 +93,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -68,6 +112,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -78,6 +130,15 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -91,6 +152,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -102,6 +171,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -112,6 +189,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -125,6 +211,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -136,6 +230,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -146,32 +248,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
@@ -180,6 +269,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 4f6e0ba074ed81..7f1ff31f594348 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -1,34 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
 
 define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+  %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -37,26 +45,32 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
 define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+  %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -65,26 +79,32 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
 define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+  %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -93,26 +113,32 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
 define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -120,15 +146,18 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -136,15 +165,18 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -153,26 +185,32 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
 define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -180,15 +218,18 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -196,15 +237,18 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -213,26 +257,32 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
 define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -240,15 +290,18 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+  %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -256,15 +309,18 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+  %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -273,26 +329,32 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
 define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -300,15 +362,18 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -316,15 +381,18 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -333,26 +401,32 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
 define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -360,15 +434,18 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -376,15 +453,18 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -393,26 +473,32 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
 define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -420,15 +506,18 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+  %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -436,15 +525,18 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+  %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -453,39 +545,48 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -494,13 +595,16 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -509,13 +613,16 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -524,13 +631,16 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -539,39 +649,48 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -580,13 +699,16 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -595,13 +717,16 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -610,13 +735,16 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -625,39 +753,48 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -666,13 +803,16 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -681,13 +821,16 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -696,13 +839,16 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -711,39 +857,48 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
 define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+  %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -752,13 +907,16 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -767,13 +925,16 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -782,13 +943,16 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+  %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
   ret half %red
 }
 
@@ -797,39 +961,48 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
 define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+  %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -838,13 +1011,16 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -853,13 +1029,16 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -868,13 +1047,16 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+  %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
   ret float %red
 }
 
@@ -883,39 +1065,48 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
 define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+  %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -924,13 +1115,16 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -939,13 +1133,16 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
   ret double %red
 }
 
@@ -954,25 +1151,31 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+  %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
   ret double %red
 }
 
 define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+  %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
   ret float %red
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index 2807f7526760f8..b565cc9ac3af4a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -1,21 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
 
 define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -24,13 +29,16 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -39,13 +47,16 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -54,13 +65,16 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -69,13 +83,16 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -84,13 +101,16 @@ declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -99,13 +119,16 @@ declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -114,13 +137,16 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -129,13 +155,16 @@ declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -144,13 +173,16 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -159,13 +191,16 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -174,13 +209,16 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -189,13 +227,16 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -204,13 +245,16 @@ declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -219,13 +263,16 @@ declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -234,13 +281,16 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -249,13 +299,16 @@ declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -264,13 +317,16 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -279,13 +335,16 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -294,13 +353,16 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -309,13 +371,16 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -324,13 +389,16 @@ declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -339,13 +407,16 @@ declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -354,13 +425,16 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+  %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i8 %red
 }
 
@@ -369,13 +443,16 @@ declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -383,15 +460,18 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -399,15 +479,18 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -416,13 +499,16 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -431,13 +517,16 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -446,13 +535,16 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -461,13 +553,16 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -476,13 +571,16 @@ declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -491,13 +589,16 @@ declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -506,13 +607,16 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -521,13 +625,16 @@ declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -535,15 +642,18 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -551,15 +661,18 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -568,13 +681,16 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -583,13 +699,16 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -598,13 +717,16 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -613,13 +735,16 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -628,13 +753,16 @@ declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -643,13 +771,16 @@ declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -658,13 +789,16 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -673,13 +807,16 @@ declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -687,15 +824,18 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -703,15 +843,18 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+  %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -720,13 +863,16 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -735,13 +881,16 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -750,13 +899,16 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -765,13 +917,16 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -780,13 +935,16 @@ declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -795,13 +953,16 @@ declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -810,13 +971,16 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+  %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i16 %red
 }
 
@@ -825,13 +989,16 @@ declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -839,15 +1006,18 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -855,15 +1025,18 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -872,13 +1045,16 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -887,13 +1063,16 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -902,13 +1081,16 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -917,13 +1099,16 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -932,13 +1117,16 @@ declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -947,13 +1135,16 @@ declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -962,13 +1153,16 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -977,13 +1171,16 @@ declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -991,15 +1188,18 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1007,15 +1207,18 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1024,13 +1227,16 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1039,13 +1245,16 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1054,13 +1263,16 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1069,13 +1281,16 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1084,13 +1299,16 @@ declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1099,13 +1317,16 @@ declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1114,13 +1335,16 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1129,13 +1353,16 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1143,15 +1370,18 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1159,15 +1389,18 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+  %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1176,13 +1409,16 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1191,13 +1427,16 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1206,13 +1445,16 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1221,13 +1463,16 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1236,13 +1481,16 @@ declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1251,13 +1499,16 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1266,13 +1517,16 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+  %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i32 %red
 }
 
@@ -1281,13 +1535,16 @@ declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1295,15 +1552,18 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1311,15 +1571,18 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1328,13 +1591,16 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1343,13 +1609,16 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1358,13 +1627,16 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1373,13 +1645,16 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1388,13 +1663,16 @@ declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1403,13 +1681,16 @@ declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1418,13 +1699,16 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1433,13 +1717,16 @@ declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1447,15 +1734,18 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1463,15 +1753,18 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1480,13 +1773,16 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1495,13 +1791,16 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1510,13 +1809,16 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1525,13 +1827,16 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1540,13 +1845,16 @@ declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1555,13 +1863,16 @@ declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1570,13 +1881,16 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1585,13 +1899,16 @@ declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1599,15 +1916,18 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1615,15 +1935,18 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+  %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1632,13 +1955,16 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1647,13 +1973,16 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1662,13 +1991,16 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1677,13 +2009,16 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1692,13 +2027,16 @@ declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1707,13 +2045,16 @@ declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
 
@@ -1722,12 +2063,15 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+  %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
   ret i64 %red
 }
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index aa03b02895d5f5..228fa602be0bdb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
 
 define i32 @reduce_i1(i32 %arg) {
 ; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
   %V32  = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
   %V64  = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
   %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+
+  %V1_vp   = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
   %V32  = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
   %V64  = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
   %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+
+  %V1_vp   = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
   %V32  = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
   %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+
+  %V1_vp   = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_i32(i32 %arg) {
   %V32  = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
   %V64  = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
   %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+
+  %V1_vp   = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
 define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
   %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_i64(i32 %arg) {
   %V32  = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
   %V64  = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
   %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+
+  %V1_vp   = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+  %V2_vp   = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+  %V4_vp   = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+  %V8_vp   = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+  %V16_vp  = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+  %V32_vp  = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+  %V64_vp  = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+  %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 

>From bca67aba598603cd82e349fed0bb368370fded09 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 19:14:20 -0700
Subject: [PATCH 2/7] [RISCV][TTI] Implement instruction costs for vp.reduce.*.

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  69 +++
 .../Analysis/CostModel/RISCV/reduce-add.ll    | 166 ++++---
 .../Analysis/CostModel/RISCV/reduce-and.ll    | 174 ++++---
 .../Analysis/CostModel/RISCV/reduce-fadd.ll   | 470 +++++++++++-------
 .../CostModel/RISCV/reduce-fmaximum.ll        | 224 ++++-----
 .../CostModel/RISCV/reduce-fminimum.ll        | 112 ++---
 .../Analysis/CostModel/RISCV/reduce-fmul.ll   | 254 +++++-----
 .../Analysis/CostModel/RISCV/reduce-max.ll    | 332 ++++++++-----
 .../Analysis/CostModel/RISCV/reduce-min.ll    | 332 ++++++++-----
 .../Analysis/CostModel/RISCV/reduce-or.ll     | 178 ++++---
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 268 +++++-----
 .../CostModel/RISCV/reduce-scalable-int.ll    | 456 ++++++++---------
 .../Analysis/CostModel/RISCV/reduce-xor.ll    | 166 ++++---
 .../CostModel/RISCV/rvv-intrinsics.ll         |  64 +--
 14 files changed, 1879 insertions(+), 1386 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 6344bc4664d3b6..9df72f201befc8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1191,6 +1191,75 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
                               ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
                               CostKind);
+  case Intrinsic::vp_reduce_add:
+  case Intrinsic::vp_reduce_fadd:
+  case Intrinsic::vp_reduce_mul:
+  case Intrinsic::vp_reduce_fmul:
+  case Intrinsic::vp_reduce_and:
+  case Intrinsic::vp_reduce_or:
+  case Intrinsic::vp_reduce_xor: {
+    unsigned Opcode;
+    switch (ICA.getID()) {
+    case Intrinsic::vp_reduce_add:
+      Opcode = Instruction::Add;
+      break;
+    case Intrinsic::vp_reduce_fadd:
+      Opcode = Instruction::FAdd;
+      break;
+    case Intrinsic::vp_reduce_mul:
+      Opcode = Instruction::Mul;
+      break;
+    case Intrinsic::vp_reduce_fmul:
+      Opcode = Instruction::FMul;
+      break;
+    case Intrinsic::vp_reduce_and:
+      Opcode = Instruction::And;
+      break;
+    case Intrinsic::vp_reduce_or:
+      Opcode = Instruction::Or;
+      break;
+    case Intrinsic::vp_reduce_xor:
+      Opcode = Instruction::Xor;
+      break;
+    }
+    return getArithmeticReductionCost(Opcode,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      ICA.getFlags(), CostKind);
+  }
+  case Intrinsic::vp_reduce_smax:
+  case Intrinsic::vp_reduce_smin:
+  case Intrinsic::vp_reduce_umax:
+  case Intrinsic::vp_reduce_umin:
+  case Intrinsic::vp_reduce_fmax:
+  case Intrinsic::vp_reduce_fmaximum:
+  case Intrinsic::vp_reduce_fmin:
+  case Intrinsic::vp_reduce_fminimum: {
+    unsigned IID;
+    switch (ICA.getID()) {
+    case Intrinsic::vp_reduce_smax:
+      IID = Intrinsic::smax;
+      break;
+    case Intrinsic::vp_reduce_smin:
+      IID = Intrinsic::smin;
+      break;
+    case Intrinsic::vp_reduce_umax:
+      IID = Intrinsic::umax;
+      break;
+    case Intrinsic::vp_reduce_umin:
+      IID = Intrinsic::umin;
+      break;
+    case Intrinsic::vp_reduce_fmax:
+    case Intrinsic::vp_reduce_fmaximum:
+      IID = Intrinsic::maximum;
+      break;
+    case Intrinsic::vp_reduce_fmin:
+    case Intrinsic::vp_reduce_fminimum:
+      IID = Intrinsic::minimum;
+      break;
+    }
+    return getMinMaxReductionCost(IID, cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
+  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 1edcdecb923e57..70687da17eb1a5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -16,14 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index f72298b8060631..76f2bd949c652c 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -19,17 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -44,17 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -93,13 +93,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -111,13 +111,13 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -149,14 +149,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -168,14 +168,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -208,14 +208,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -227,14 +227,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -258,6 +258,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index f1636785b90b81..32b62be3afedb2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -20,13 +20,13 @@ define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -51,13 +51,13 @@ define void @reduce_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -114,20 +114,20 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -146,13 +146,13 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -176,20 +176,20 @@ define void @reduce_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -239,20 +239,20 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
@@ -269,20 +269,20 @@ define void @reduce_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -330,20 +330,20 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
@@ -359,20 +359,20 @@ define void @reduce_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -408,6 +408,64 @@ define void @reduce_fadd_double() {
 define void @reduce_ordered_fadd_bfloat() {
 ; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat'
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_ordered_fadd_bfloat'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -444,6 +502,68 @@ define void @reduce_ordered_fadd_bfloat() {
 
 define void @reduce_ordered_fadd_half() {
 ;
+; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half'
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half'
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
 ; SIZE-LABEL: 'reduce_ordered_fadd_half'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
@@ -459,20 +579,20 @@ define void @reduce_ordered_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -522,20 +642,20 @@ define void @reduce_ordered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fadd_float'
@@ -552,20 +672,20 @@ define void @reduce_ordered_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -613,20 +733,20 @@ define void @reduce_ordered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fadd_double'
@@ -642,20 +762,20 @@ define void @reduce_ordered_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index dedeb4be67ae82..6e33ccaaed5c7e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -22,34 +22,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -67,34 +67,34 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -165,34 +165,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -208,34 +208,34 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 6d51911b4fc408..d8fd6393039282 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -15,20 +15,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -39,20 +39,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -96,20 +96,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -119,20 +119,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index dc43a54ff1855b..35a5343b6cc65d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -20,13 +20,13 @@ define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -51,13 +51,13 @@ define void @reduce_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -115,13 +115,13 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -146,13 +146,13 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -177,13 +177,13 @@ define void @reduce_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -240,13 +240,13 @@ define void @reduce_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 121 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -270,13 +270,13 @@ define void @reduce_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -331,13 +331,13 @@ define void @reduce_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 361 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -360,13 +360,13 @@ define void @reduce_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -421,14 +421,14 @@ define void @reduce_ordered_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -452,14 +452,14 @@ define void @reduce_ordered_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -516,14 +516,14 @@ define void @reduce_ordered_fmul_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -547,14 +547,14 @@ define void @reduce_ordered_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -610,14 +610,14 @@ define void @reduce_ordered_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -639,14 +639,14 @@ define void @reduce_ordered_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -698,14 +698,14 @@ define void @reduce_ordered_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -725,14 +725,14 @@ define void @reduce_ordered_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c037eb5bfc17f4..f77e94cd333aa5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -16,14 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -133,14 +133,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i16'
@@ -151,14 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -190,14 +190,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umax_i32'
@@ -209,14 +209,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -240,6 +240,44 @@ define i32 @reduce_umax_i32(i32 %arg) {
 }
 
 define i32 @reduce_umax_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_umax_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_umax_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -270,14 +308,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -289,14 +327,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
@@ -329,14 +367,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i8'
@@ -348,14 +386,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -388,14 +426,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i16'
@@ -407,14 +445,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -447,14 +485,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smax_i32'
@@ -466,14 +504,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -497,6 +535,44 @@ define i32 @reduce_smax_i32(i32 %arg) {
 }
 
 define i32 @reduce_smax_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_smax_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_smax_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 858c053f0c3ef3..3e6a19e86a904a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -16,14 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_umin_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_umin_i32(i32 %arg) {
 }
 
 define i32 @reduce_umin_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_umin_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_umin_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@@ -273,14 +311,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i1'
@@ -292,14 +330,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
@@ -332,14 +370,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i8'
@@ -351,14 +389,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -391,14 +429,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i16'
@@ -410,14 +448,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -450,14 +488,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_smin_i32'
@@ -469,14 +507,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -500,6 +538,44 @@ define i32 @reduce_smin_i32(i32 %arg) {
 }
 
 define i32 @reduce_smin_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_smin_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_smin_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index 2db77240fc3fcf..69805e79641011 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -19,17 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -44,17 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -93,14 +93,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -112,14 +112,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -152,14 +152,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -171,14 +171,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -211,14 +211,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -230,14 +230,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -261,6 +261,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 7f1ff31f594348..134f75d6b4692d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -11,12 +11,12 @@ declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
 define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -27,12 +27,12 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -45,12 +45,12 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
 define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -61,12 +61,12 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -79,12 +79,12 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
 define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -95,12 +95,12 @@ define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -113,12 +113,12 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
 define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -129,12 +129,12 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -146,13 +146,13 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -165,13 +165,13 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -185,12 +185,12 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
 define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -201,12 +201,12 @@ define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -218,13 +218,13 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -237,13 +237,13 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -257,12 +257,12 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
 define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -273,12 +273,12 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -290,13 +290,13 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -309,13 +309,13 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -329,12 +329,12 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
 define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -345,12 +345,12 @@ define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -362,13 +362,13 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -381,13 +381,13 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -401,12 +401,12 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
 define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -417,12 +417,12 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -434,13 +434,13 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -453,13 +453,13 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -473,12 +473,12 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
 define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -489,12 +489,12 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -506,13 +506,13 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -525,13 +525,13 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -545,12 +545,12 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -561,12 +561,12 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -577,12 +577,12 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -595,12 +595,12 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -613,12 +613,12 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -631,12 +631,12 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -649,12 +649,12 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -665,12 +665,12 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -681,12 +681,12 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -699,12 +699,12 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -717,12 +717,12 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -735,12 +735,12 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -753,12 +753,12 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -769,12 +769,12 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -785,12 +785,12 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -803,12 +803,12 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -821,12 +821,12 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -839,12 +839,12 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -857,12 +857,12 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -873,12 +873,12 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -889,12 +889,12 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -907,12 +907,12 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -925,12 +925,12 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -943,12 +943,12 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -961,12 +961,12 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -977,12 +977,12 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -993,12 +993,12 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -1011,12 +1011,12 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -1029,12 +1029,12 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -1047,12 +1047,12 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -1065,12 +1065,12 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1081,12 +1081,12 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1097,12 +1097,12 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1115,12 +1115,12 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -1133,12 +1133,12 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -1151,12 +1151,12 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
@@ -1167,12 +1167,12 @@ define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index b565cc9ac3af4a..6cd817a93552b7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -11,12 +11,12 @@ declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -29,12 +29,12 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@@ -47,12 +47,12 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@@ -65,12 +65,12 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@@ -83,12 +83,12 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@@ -101,12 +101,12 @@ declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -119,12 +119,12 @@ declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -137,12 +137,12 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
 define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -155,12 +155,12 @@ declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -173,12 +173,12 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@@ -191,12 +191,12 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@@ -209,12 +209,12 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@@ -227,12 +227,12 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@@ -245,12 +245,12 @@ declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -263,12 +263,12 @@ declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -281,12 +281,12 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
 define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -299,12 +299,12 @@ declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -317,12 +317,12 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@@ -335,12 +335,12 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@@ -353,12 +353,12 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@@ -371,12 +371,12 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@@ -389,12 +389,12 @@ declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -407,12 +407,12 @@ declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -425,12 +425,12 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
 define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
 ;
   %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -443,12 +443,12 @@ declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -460,13 +460,13 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -479,13 +479,13 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -499,12 +499,12 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@@ -517,12 +517,12 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@@ -535,12 +535,12 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@@ -553,12 +553,12 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@@ -571,12 +571,12 @@ declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -589,12 +589,12 @@ declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -607,12 +607,12 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
 define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -625,12 +625,12 @@ declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -642,13 +642,13 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -661,13 +661,13 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -681,12 +681,12 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@@ -699,12 +699,12 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@@ -717,12 +717,12 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@@ -735,12 +735,12 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@@ -753,12 +753,12 @@ declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -771,12 +771,12 @@ declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -789,12 +789,12 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
 define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -807,12 +807,12 @@ declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
@@ -824,13 +824,13 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -843,13 +843,13 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -863,12 +863,12 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
@@ -881,12 +881,12 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
@@ -899,12 +899,12 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
@@ -917,12 +917,12 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
@@ -935,12 +935,12 @@ declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
@@ -953,12 +953,12 @@ declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
@@ -971,12 +971,12 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
 define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
 ;
   %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
@@ -989,12 +989,12 @@ declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -1006,13 +1006,13 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -1025,13 +1025,13 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -1045,12 +1045,12 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@@ -1063,12 +1063,12 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@@ -1081,12 +1081,12 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@@ -1099,12 +1099,12 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@@ -1117,12 +1117,12 @@ declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -1135,12 +1135,12 @@ declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -1153,12 +1153,12 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
 define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -1171,12 +1171,12 @@ declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
@@ -1188,13 +1188,13 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1207,13 +1207,13 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1227,12 +1227,12 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1245,12 +1245,12 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1263,12 +1263,12 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1281,12 +1281,12 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1299,12 +1299,12 @@ declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
@@ -1317,12 +1317,12 @@ declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
@@ -1335,12 +1335,12 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
 define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
@@ -1353,12 +1353,12 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1370,13 +1370,13 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1389,13 +1389,13 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1409,12 +1409,12 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1427,12 +1427,12 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1445,12 +1445,12 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1463,12 +1463,12 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1481,12 +1481,12 @@ declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1499,12 +1499,12 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1517,12 +1517,12 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
 define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
 ;
   %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1535,12 +1535,12 @@ declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
@@ -1552,13 +1552,13 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1571,13 +1571,13 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv1i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1591,12 +1591,12 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1609,12 +1609,12 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1627,12 +1627,12 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1645,12 +1645,12 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1663,12 +1663,12 @@ declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
@@ -1681,12 +1681,12 @@ declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
@@ -1699,12 +1699,12 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
 define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv1i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv1i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
@@ -1717,12 +1717,12 @@ declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@@ -1734,13 +1734,13 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1753,13 +1753,13 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1773,12 +1773,12 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1791,12 +1791,12 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1809,12 +1809,12 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1827,12 +1827,12 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1845,12 +1845,12 @@ declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@@ -1863,12 +1863,12 @@ declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@@ -1881,12 +1881,12 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
 define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv2i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@@ -1899,12 +1899,12 @@ declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_add_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_add_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@@ -1916,13 +1916,13 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_add_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_add_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1935,13 +1935,13 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
 ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1955,12 +1955,12 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1973,12 +1973,12 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smax_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smax_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1991,12 +1991,12 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_umin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_umin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
@@ -2009,12 +2009,12 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_smin_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_smin_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
@@ -2027,12 +2027,12 @@ declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_and_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_and_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@@ -2045,12 +2045,12 @@ declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_or_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_or_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@@ -2063,12 +2063,12 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
 define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
 ; CHECK-LABEL: 'vreduce_xor_nxv4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
 ;
 ; SIZE-LABEL: 'vreduce_xor_nxv4i64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
 ;
   %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index 228fa602be0bdb..1ea5bcdf8ef9d9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -16,14 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_i32(i32 %arg) {
 }
 
 define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
   %V1   = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
   %V2   = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
   %V4   = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index bb98508f239c1b..869e51966e092a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -1218,37 +1218,37 @@ define void @reduce_add() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'reduce_add'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1324,37 +1324,37 @@ define void @reduce_fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'reduce_fadd'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;

>From ba2534fe8db5c2ea25de335aab2d72372fb59e70 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 21:58:43 -0700
Subject: [PATCH 3/7] Fix missing tests

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |   4 +
 .../Analysis/CostModel/RISCV/reduce-and.ll    |   3 +
 .../CostModel/RISCV/reduce-fmaximum.ll        | 112 +++++++-------
 .../CostModel/RISCV/reduce-fminimum.ll        |  56 +++----
 .../Analysis/CostModel/RISCV/reduce-max.ll    |   3 +
 .../CostModel/RISCV/reduce-scalable-fp.ll     | 144 +++++++++---------
 6 files changed, 166 insertions(+), 156 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 9df72f201befc8..dc7165bf1cc364 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1249,10 +1249,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
       IID = Intrinsic::umin;
       break;
     case Intrinsic::vp_reduce_fmax:
+      IID = Intrinsic::maxnum;
+      break;
     case Intrinsic::vp_reduce_fmaximum:
       IID = Intrinsic::maximum;
       break;
     case Intrinsic::vp_reduce_fmin:
+      IID = Intrinsic::minnum;
+      break;
     case Intrinsic::vp_reduce_fminimum:
       IID = Intrinsic::minimum;
       break;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 76f2bd949c652c..47ae0bfa58cf9a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -100,6 +100,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'reduce_i8'
@@ -118,6 +119,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -136,6 +138,7 @@ define i32 @reduce_i8(i32 %arg) {
   %V16_vp  = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
   %V32_vp  = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
   %V64_vp  = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+  %V128_vp  = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
   ret i32 undef
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 6e33ccaaed5c7e..c28fdfbfddac1a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -36,20 +36,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -81,20 +81,20 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -179,20 +179,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -222,20 +222,20 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index d8fd6393039282..99c9f5d89f6963 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -22,13 +22,13 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -46,13 +46,13 @@ define float @reduce_fmaximum_f32(float %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
 ;
 %V2   = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -103,13 +103,13 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double undef
 ;
 ; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -126,13 +126,13 @@ define double @reduce_fmaximum_f64(double %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double undef
 ;
 %V2   = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f77e94cd333aa5..e0f98a759cc89b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -133,6 +133,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
@@ -151,6 +152,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
@@ -168,6 +170,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
   %V16  = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
   %V32  = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
   %V64  = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+  %V128  = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
 
   %V1_vp   = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
   %V2_vp   = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 134f75d6b4692d..74dbcfae93f858 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -545,12 +545,12 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -561,12 +561,12 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -577,12 +577,12 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -595,12 +595,12 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -613,12 +613,12 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -631,12 +631,12 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -649,12 +649,12 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -665,12 +665,12 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -681,12 +681,12 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -699,12 +699,12 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -717,12 +717,12 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -735,12 +735,12 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -753,12 +753,12 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -769,12 +769,12 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -785,12 +785,12 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -803,12 +803,12 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -821,12 +821,12 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -839,12 +839,12 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -857,12 +857,12 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
 define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -873,12 +873,12 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -889,12 +889,12 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
 define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -907,12 +907,12 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
 define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -925,12 +925,12 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
 define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -943,12 +943,12 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
 define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret half %red
 ;
   %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -961,12 +961,12 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
 define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -977,12 +977,12 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -993,12 +993,12 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
 define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -1011,12 +1011,12 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
 define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -1029,12 +1029,12 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -1047,12 +1047,12 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
 define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float %red
 ;
   %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -1065,12 +1065,12 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
 define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1081,12 +1081,12 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1097,12 +1097,12 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
 define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1115,12 +1115,12 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
 define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -1133,12 +1133,12 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -1151,12 +1151,12 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
 define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
 ; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
 ; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret double %red
 ;
   %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)

>From 3cb1b3de68e4089ea7f6b9e696368137a12a9aec Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 30 Oct 2024 09:38:37 -0700
Subject: [PATCH 4/7] Fixup! early return

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 103 ++++++++----------
 1 file changed, 45 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index dc7165bf1cc364..bdb8d031fad31f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1192,78 +1192,65 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                               ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
                               CostKind);
   case Intrinsic::vp_reduce_add:
+    return getArithmeticReductionCost(Instruction::Add,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_fadd:
+    return getArithmeticReductionCost(Instruction::FAdd,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_mul:
+    return getArithmeticReductionCost(Instruction::Mul,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_fmul:
+    return getArithmeticReductionCost(Instruction::FMul,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_and:
+    return getArithmeticReductionCost(Instruction::And,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_or:
-  case Intrinsic::vp_reduce_xor: {
-    unsigned Opcode;
-    switch (ICA.getID()) {
-    case Intrinsic::vp_reduce_add:
-      Opcode = Instruction::Add;
-      break;
-    case Intrinsic::vp_reduce_fadd:
-      Opcode = Instruction::FAdd;
-      break;
-    case Intrinsic::vp_reduce_mul:
-      Opcode = Instruction::Mul;
-      break;
-    case Intrinsic::vp_reduce_fmul:
-      Opcode = Instruction::FMul;
-      break;
-    case Intrinsic::vp_reduce_and:
-      Opcode = Instruction::And;
-      break;
-    case Intrinsic::vp_reduce_or:
-      Opcode = Instruction::Or;
-      break;
-    case Intrinsic::vp_reduce_xor:
-      Opcode = Instruction::Xor;
-      break;
-    }
-    return getArithmeticReductionCost(Opcode,
+    return getArithmeticReductionCost(Instruction::Or,
                                       cast<VectorType>(ICA.getArgTypes()[1]),
-                                      ICA.getFlags(), CostKind);
-  }
+                                      std::nullopt, CostKind);
+  case Intrinsic::vp_reduce_xor:
+    return getArithmeticReductionCost(Instruction::Xor,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_smax:
+    return getMinMaxReductionCost(Intrinsic::smax,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_smin:
+    return getMinMaxReductionCost(Intrinsic::smin,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_umax:
+    return getMinMaxReductionCost(Intrinsic::umax,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_umin:
+    return getMinMaxReductionCost(Intrinsic::umin,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_fmax:
+    return getMinMaxReductionCost(Intrinsic::maxnum,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_fmaximum:
+    return getMinMaxReductionCost(Intrinsic::maximum,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_fmin:
-  case Intrinsic::vp_reduce_fminimum: {
-    unsigned IID;
-    switch (ICA.getID()) {
-    case Intrinsic::vp_reduce_smax:
-      IID = Intrinsic::smax;
-      break;
-    case Intrinsic::vp_reduce_smin:
-      IID = Intrinsic::smin;
-      break;
-    case Intrinsic::vp_reduce_umax:
-      IID = Intrinsic::umax;
-      break;
-    case Intrinsic::vp_reduce_umin:
-      IID = Intrinsic::umin;
-      break;
-    case Intrinsic::vp_reduce_fmax:
-      IID = Intrinsic::maxnum;
-      break;
-    case Intrinsic::vp_reduce_fmaximum:
-      IID = Intrinsic::maximum;
-      break;
-    case Intrinsic::vp_reduce_fmin:
-      IID = Intrinsic::minnum;
-      break;
-    case Intrinsic::vp_reduce_fminimum:
-      IID = Intrinsic::minimum;
-      break;
-    }
-    return getMinMaxReductionCost(IID, cast<VectorType>(ICA.getArgTypes()[1]),
+    return getMinMaxReductionCost(Intrinsic::minnum,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
+                                  ICA.getFlags(), CostKind);
+  case Intrinsic::vp_reduce_fminimum:
+    return getMinMaxReductionCost(Intrinsic::minimum,
+                                  cast<VectorType>(ICA.getArgTypes()[1]),
                                   ICA.getFlags(), CostKind);
-  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {

>From 93cbca811644a7856ce677d1d1f5fd4c4203a792 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 30 Oct 2024 18:38:08 -0700
Subject: [PATCH 5/7] Fixup! using helper function to get the reduction opcode.

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 64 ++++++-------------
 1 file changed, 19 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index bdb8d031fad31f..2bbb3c4e7bc1ba 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1192,66 +1192,40 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                               ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
                               CostKind);
   case Intrinsic::vp_reduce_add:
-    return getArithmeticReductionCost(Instruction::Add,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_fadd:
-    return getArithmeticReductionCost(Instruction::FAdd,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_mul:
-    return getArithmeticReductionCost(Instruction::Mul,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_fmul:
-    return getArithmeticReductionCost(Instruction::FMul,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_and:
-    return getArithmeticReductionCost(Instruction::And,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      std::nullopt, CostKind);
   case Intrinsic::vp_reduce_or:
-    return getArithmeticReductionCost(Instruction::Or,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      std::nullopt, CostKind);
-  case Intrinsic::vp_reduce_xor:
-    return getArithmeticReductionCost(Instruction::Xor,
-                                      cast<VectorType>(ICA.getArgTypes()[1]),
-                                      std::nullopt, CostKind);
+  case Intrinsic::vp_reduce_xor: {
+    std::optional<Intrinsic::ID> RedID =
+        VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
+    assert(RedID.has_value());
+    unsigned RedOp = getArithmeticReductionInstruction(*RedID);
+    if (RedOp == Instruction::FAdd || RedOp == Instruction::FMul)
+      return getArithmeticReductionCost(RedOp,
+                                        cast<VectorType>(ICA.getArgTypes()[1]),
+                                        ICA.getFlags(), CostKind);
+    return getArithmeticReductionCost(
+        RedOp, cast<VectorType>(ICA.getArgTypes()[1]), std::nullopt, CostKind);
+  }
   case Intrinsic::vp_reduce_smax:
-    return getMinMaxReductionCost(Intrinsic::smax,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_smin:
-    return getMinMaxReductionCost(Intrinsic::smin,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_umax:
-    return getMinMaxReductionCost(Intrinsic::umax,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_umin:
-    return getMinMaxReductionCost(Intrinsic::umin,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_fmax:
-    return getMinMaxReductionCost(Intrinsic::maxnum,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_fmaximum:
-    return getMinMaxReductionCost(Intrinsic::maximum,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
   case Intrinsic::vp_reduce_fmin:
-    return getMinMaxReductionCost(Intrinsic::minnum,
-                                  cast<VectorType>(ICA.getArgTypes()[1]),
-                                  ICA.getFlags(), CostKind);
-  case Intrinsic::vp_reduce_fminimum:
-    return getMinMaxReductionCost(Intrinsic::minimum,
+  case Intrinsic::vp_reduce_fminimum: {
+    std::optional<Intrinsic::ID> RedID =
+        VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
+    assert(RedID.has_value());
+    Intrinsic::ID MinMaxID = getMinMaxReductionIntrinsicOp(*RedID);
+    return getMinMaxReductionCost(MinMaxID,
                                   cast<VectorType>(ICA.getArgTypes()[1]),
                                   ICA.getFlags(), CostKind);
   }
+  }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
     if (auto LT = getTypeLegalizationCost(RetTy);

>From 5c41bc3cfd63073f8ae4d9efcb36f66af616c09a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 5 Nov 2024 18:10:24 -0800
Subject: [PATCH 6/7] Fixup! Revert changes of original run lines and add new
 runs for type-based query.

The new RUNs check the instruction costs from type-based queries are same
as normal queries.
---
 llvm/test/Analysis/CostModel/RISCV/reduce-add.ll          | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-and.ll          | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll         | 4 ++++
 llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll     | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll     | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll         | 4 ++++
 llvm/test/Analysis/CostModel/RISCV/reduce-max.ll          | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-min.ll          | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-or.ll           | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll  | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll | 5 +++++
 llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll          | 5 +++++
 12 files changed, 58 insertions(+)

diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 70687da17eb1a5..3a1d361f428a9a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 47ae0bfa58cf9a..5b0384523bccfc 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 32b62be3afedb2..d1d5934b8a8f16 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,4 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s  --check-prefix=SIZE
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index c28fdfbfddac1a..ec6356896b8c2d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 99c9f5d89f6963..9a6918ff51b3bb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index 35a5343b6cc65d..4f702691f92ebf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -1,4 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s  --check-prefix=SIZE
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index e0f98a759cc89b..42462d2479fe77 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 3e6a19e86a904a..8b0cfa6ddda8c6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index 69805e79641011..683aaaa7a20ed9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 74dbcfae93f858..659639bdc1e2e9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index 6cd817a93552b7..9a2d1baa583e12 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index 1ea5bcdf8ef9d9..8a85c8b7817374 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -1,4 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \

>From 9f5ffa6a33a663fb7af3fb4f72f607b6a64e318a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 6 Nov 2024 21:34:07 -0800
Subject: [PATCH 7/7] Fixup! Passing FMF flags.

---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2bbb3c4e7bc1ba..5d21bb611df4e4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1202,12 +1202,9 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
         VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
     assert(RedID.has_value());
     unsigned RedOp = getArithmeticReductionInstruction(*RedID);
-    if (RedOp == Instruction::FAdd || RedOp == Instruction::FMul)
-      return getArithmeticReductionCost(RedOp,
-                                        cast<VectorType>(ICA.getArgTypes()[1]),
-                                        ICA.getFlags(), CostKind);
-    return getArithmeticReductionCost(
-        RedOp, cast<VectorType>(ICA.getArgTypes()[1]), std::nullopt, CostKind);
+    return getArithmeticReductionCost(RedOp,
+                                      cast<VectorType>(ICA.getArgTypes()[1]),
+                                      ICA.getFlags(), CostKind);
   }
   case Intrinsic::vp_reduce_smax:
   case Intrinsic::vp_reduce_smin: